In the log message when we have found an inconsistent ip address allocation,
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 if (status == -ETIME) {
132                         ctdb_ban_self(ctdb);
133                 }
134                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135                         ctdb_addr_to_str(state->addr),
136                         state->vnn->iface));
137                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
138                 talloc_free(state);
139                 return;
140         }
141
142         if (!state->vnn->takeover_ctx) {
143                 state->vnn->takeover_ctx = talloc_new(state->vnn);
144                 if (!state->vnn->takeover_ctx) {
145                         goto failed;
146                 }
147         }
148
149         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150         if (!arp) goto failed;
151         
152         arp->ctdb = ctdb;
153         arp->addr = *state->addr;
154         arp->vnn  = state->vnn;
155
156         tcparray = state->vnn->tcp_array;
157         if (tcparray) {
158                 /* add all of the known tcp connections for this IP to the
159                    list of tcp connections to send tickle acks for */
160                 arp->tcparray = talloc_steal(arp, tcparray);
161
162                 state->vnn->tcp_array = NULL;
163                 state->vnn->tcp_update_needed = true;
164         }
165
166         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
167                         timeval_zero(), ctdb_control_send_arp, arp);
168
169         /* the control succeeded */
170         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
171         talloc_free(state);
172         return;
173
174 failed:
175         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
176         talloc_free(state);
177         return;
178 }
179
180 /*
181   Find the vnn of the node that has a public ip address
182   returns -1 if the address is not known as a public address
183  */
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
185 {
186         struct ctdb_vnn *vnn;
187
188         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189                 if (ctdb_same_ip(&vnn->public_address, addr)) {
190                         return vnn;
191                 }
192         }
193
194         return NULL;
195 }
196
197
198 /*
199   take over an ip address
200  */
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
202                                  struct ctdb_req_control *c,
203                                  TDB_DATA indata, 
204                                  bool *async_reply)
205 {
206         int ret;
207         struct takeover_callback_state *state;
208         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209         struct ctdb_vnn *vnn;
210
211         /* update out vnn list */
212         vnn = find_public_ip_vnn(ctdb, &pip->addr);
213         if (vnn == NULL) {
214                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
215                         ctdb_addr_to_str(&pip->addr)));
216                 return 0;
217         }
218         vnn->pnn = pip->pnn;
219
220         /* if our kernel already has this IP, do nothing */
221         if (ctdb_sys_have_ip(&pip->addr)) {
222                 return 0;
223         }
224
225         state = talloc(vnn, struct takeover_callback_state);
226         CTDB_NO_MEMORY(ctdb, state);
227
228         state->c = talloc_steal(ctdb, c);
229         state->addr = talloc(ctdb, ctdb_sock_addr);
230         CTDB_NO_MEMORY(ctdb, state->addr);
231
232         *state->addr = pip->addr;
233         state->vnn   = vnn;
234
235         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
236                 ctdb_addr_to_str(&pip->addr),
237                 vnn->public_netmask_bits, 
238                 vnn->iface));
239
240         ret = ctdb_event_script_callback(ctdb, 
241                                          state, takeover_ip_callback, state,
242                                          false,
243                                          CTDB_EVENT_TAKE_IP,
244                                          "%s %s %u",
245                                          vnn->iface, 
246                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247                                          vnn->public_netmask_bits);
248
249         if (ret != 0) {
250                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251                         ctdb_addr_to_str(&pip->addr),
252                         vnn->iface));
253                 talloc_free(state);
254                 return -1;
255         }
256
257         /* tell ctdb_control.c that we will be replying asynchronously */
258         *async_reply = true;
259
260         return 0;
261 }
262
263 /*
264   takeover an ip address old v4 style
265  */
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
267                                 struct ctdb_req_control *c,
268                                 TDB_DATA indata, 
269                                 bool *async_reply)
270 {
271         TDB_DATA data;
272         
273         data.dsize = sizeof(struct ctdb_public_ip);
274         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275         CTDB_NO_MEMORY(ctdb, data.dptr);
276         
277         memcpy(data.dptr, indata.dptr, indata.dsize);
278         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
279 }
280
281 /*
282   kill any clients that are registered with a IP that is being released
283  */
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
285 {
286         struct ctdb_client_ip *ip;
287
288         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289                 ctdb_addr_to_str(addr)));
290
291         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292                 ctdb_sock_addr tmp_addr;
293
294                 tmp_addr = ip->addr;
295                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
296                         ip->client_id,
297                         ctdb_addr_to_str(&ip->addr)));
298
299                 if (ctdb_same_ip(&tmp_addr, addr)) {
300                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
301                                                                      ip->client_id, 
302                                                                      struct ctdb_client);
303                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
304                                 ip->client_id,
305                                 ctdb_addr_to_str(&ip->addr),
306                                 client->pid));
307
308                         if (client->pid != 0) {
309                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310                                         (unsigned)client->pid,
311                                         ctdb_addr_to_str(addr),
312                                         ip->client_id));
313                                 kill(client->pid, SIGKILL);
314                         }
315                 }
316         }
317 }
318
319 /*
320   called when releaseip event finishes
321  */
322 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
323                                 void *private_data)
324 {
325         struct takeover_callback_state *state = 
326                 talloc_get_type(private_data, struct takeover_callback_state);
327         TDB_DATA data;
328
329         if (status == -ETIME) {
330                 ctdb_ban_self(ctdb);
331         }
332
333         /* send a message to all clients of this node telling them
334            that the cluster has been reconfigured and they should
335            release any sockets on this IP */
336         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338         data.dsize = strlen((char *)data.dptr)+1;
339
340         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
341
342         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
343
344         /* kill clients that have registered with this IP */
345         release_kill_clients(ctdb, state->addr);
346         
347         /* the control succeeded */
348         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
349         talloc_free(state);
350 }
351
352 /*
353   release an ip address
354  */
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
356                                 struct ctdb_req_control *c,
357                                 TDB_DATA indata, 
358                                 bool *async_reply)
359 {
360         int ret;
361         struct takeover_callback_state *state;
362         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363         struct ctdb_vnn *vnn;
364
365         /* update our vnn list */
366         vnn = find_public_ip_vnn(ctdb, &pip->addr);
367         if (vnn == NULL) {
368                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369                         ctdb_addr_to_str(&pip->addr)));
370                 return 0;
371         }
372         vnn->pnn = pip->pnn;
373
374         /* stop any previous arps */
375         talloc_free(vnn->takeover_ctx);
376         vnn->takeover_ctx = NULL;
377
378         if (!ctdb_sys_have_ip(&pip->addr)) {
379                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
380                         ctdb_addr_to_str(&pip->addr),
381                         vnn->public_netmask_bits, 
382                         vnn->iface));
383                 return 0;
384         }
385
386         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
387                 ctdb_addr_to_str(&pip->addr),
388                 vnn->public_netmask_bits, 
389                 vnn->iface,
390                 pip->pnn));
391
392         state = talloc(ctdb, struct takeover_callback_state);
393         CTDB_NO_MEMORY(ctdb, state);
394
395         state->c = talloc_steal(state, c);
396         state->addr = talloc(state, ctdb_sock_addr);       
397         CTDB_NO_MEMORY(ctdb, state->addr);
398         *state->addr = pip->addr;
399         state->vnn   = vnn;
400
401         ret = ctdb_event_script_callback(ctdb, 
402                                          state, release_ip_callback, state,
403                                          false,
404                                          CTDB_EVENT_RELEASE_IP,
405                                          "%s %s %u",
406                                          vnn->iface, 
407                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408                                          vnn->public_netmask_bits);
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411                         ctdb_addr_to_str(&pip->addr),
412                         vnn->iface));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         /* tell the control that we will be reply asynchronously */
418         *async_reply = true;
419         return 0;
420 }
421
422 /*
423   release an ip address old v4 style
424  */
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
426                                 struct ctdb_req_control *c,
427                                 TDB_DATA indata, 
428                                 bool *async_reply)
429 {
430         TDB_DATA data;
431         
432         data.dsize = sizeof(struct ctdb_public_ip);
433         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434         CTDB_NO_MEMORY(ctdb, data.dptr);
435         
436         memcpy(data.dptr, indata.dptr, indata.dsize);
437         return ctdb_control_release_ip(ctdb, c, data, async_reply);
438 }
439
440
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
442 {
443         struct ctdb_vnn      *vnn;
444
445         /* Verify that we dont have an entry for this ip yet */
446         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
449                                 ctdb_addr_to_str(addr)));
450                         return -1;
451                 }               
452         }
453
454         /* create a new vnn structure for this ip address */
455         vnn = talloc_zero(ctdb, struct ctdb_vnn);
456         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457         vnn->iface = talloc_strdup(vnn, iface);
458         CTDB_NO_MEMORY(ctdb, vnn->iface);
459         vnn->public_address      = *addr;
460         vnn->public_netmask_bits = mask;
461         vnn->pnn                 = -1;
462         
463         DLIST_ADD(ctdb->vnn, vnn);
464
465         return 0;
466 }
467
468
469 /*
470   setup the event script directory
471 */
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
473 {
474         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
476         return 0;
477 }
478
479 /*
480   setup the public address lists from a file
481 */
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
483 {
484         char **lines;
485         int nlines;
486         int i;
487
488         lines = file_lines_load(alist, &nlines, ctdb);
489         if (lines == NULL) {
490                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
491                 return -1;
492         }
493         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
494                 nlines--;
495         }
496
497         for (i=0;i<nlines;i++) {
498                 unsigned mask;
499                 ctdb_sock_addr addr;
500                 const char *addrstr;
501                 const char *iface;
502                 char *tok, *line;
503
504                 line = lines[i];
505                 while ((*line == ' ') || (*line == '\t')) {
506                         line++;
507                 }
508                 if (*line == '#') {
509                         continue;
510                 }
511                 if (strcmp(line, "") == 0) {
512                         continue;
513                 }
514                 tok = strtok(line, " \t");
515                 addrstr = tok;
516                 tok = strtok(NULL, " \t");
517                 if (tok == NULL) {
518                         if (NULL == ctdb->default_public_interface) {
519                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
520                                          i+1));
521                                 talloc_free(lines);
522                                 return -1;
523                         }
524                         iface = ctdb->default_public_interface;
525                 } else {
526                         iface = tok;
527                 }
528
529                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
531                         talloc_free(lines);
532                         return -1;
533                 }
534                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
536                         talloc_free(lines);
537                         return -1;
538                 }
539         }
540
541         talloc_free(lines);
542         return 0;
543 }
544
545
546
547
548 struct ctdb_public_ip_list {
549         struct ctdb_public_ip_list *next;
550         uint32_t pnn;
551         ctdb_sock_addr addr;
552 };
553
554
555 /* Given a physical node, return the number of
556    public addresses that is currently assigned to this node.
557 */
558 static int node_ip_coverage(struct ctdb_context *ctdb, 
559         int32_t pnn,
560         struct ctdb_public_ip_list *ips)
561 {
562         int num=0;
563
564         for (;ips;ips=ips->next) {
565                 if (ips->pnn == pnn) {
566                         num++;
567                 }
568         }
569         return num;
570 }
571
572
573 /* Check if this is a public ip known to the node, i.e. can that
574    node takeover this ip ?
575 */
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
577                 struct ctdb_public_ip_list *ip)
578 {
579         struct ctdb_all_public_ips *public_ips;
580         int i;
581
582         public_ips = ctdb->nodes[pnn]->public_ips;
583
584         if (public_ips == NULL) {
585                 return -1;
586         }
587
588         for (i=0;i<public_ips->num;i++) {
589                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590                         /* yes, this node can serve this public ip */
591                         return 0;
592                 }
593         }
594
595         return -1;
596 }
597
598
599 /* search the node lists list for a node to takeover this ip.
600    pick the node that currently are serving the least number of ips
601    so that the ips get spread out evenly.
602 */
603 static int find_takeover_node(struct ctdb_context *ctdb, 
604                 struct ctdb_node_map *nodemap, uint32_t mask, 
605                 struct ctdb_public_ip_list *ip,
606                 struct ctdb_public_ip_list *all_ips)
607 {
608         int pnn, min=0, num;
609         int i;
610
611         pnn    = -1;
612         for (i=0;i<nodemap->num;i++) {
613                 if (nodemap->nodes[i].flags & mask) {
614                         /* This node is not healty and can not be used to serve
615                            a public address 
616                         */
617                         continue;
618                 }
619
620                 /* verify that this node can serve this ip */
621                 if (can_node_serve_ip(ctdb, i, ip)) {
622                         /* no it couldnt   so skip to the next node */
623                         continue;
624                 }
625
626                 num = node_ip_coverage(ctdb, i, all_ips);
627                 /* was this the first node we checked ? */
628                 if (pnn == -1) {
629                         pnn = i;
630                         min  = num;
631                 } else {
632                         if (num < min) {
633                                 pnn = i;
634                                 min  = num;
635                         }
636                 }
637         }       
638         if (pnn == -1) {
639                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640                         ctdb_addr_to_str(&ip->addr)));
641
642                 return -1;
643         }
644
645         ip->pnn = pnn;
646         return 0;
647 }
648
649 #define IP_KEYLEN       4
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
651 {
652         static uint32_t key[IP_KEYLEN];
653
654         bzero(key, sizeof(key));
655
656         switch (ip->sa.sa_family) {
657         case AF_INET:
658                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
659                 break;
660         case AF_INET6:
661                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
665                 break;
666         default:
667                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
668                 return key;
669         }
670
671         return key;
672 }
673
674 static void *add_ip_callback(void *parm, void *data)
675 {
676         return parm;
677 }
678
679 void getips_count_callback(void *param, void *data)
680 {
681         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
683
684         new_ip->next = *ip_list;
685         *ip_list     = new_ip;
686 }
687
688 struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb)
690 {
691         int i, j;
692         struct ctdb_public_ip_list *ip_list;
693         struct ctdb_all_public_ips *public_ips;
694
695         if (ctdb->ip_tree != NULL) {
696                 talloc_free(ctdb->ip_tree);
697                 ctdb->ip_tree = NULL;
698         }
699         ctdb->ip_tree = trbt_create(ctdb, 0);
700
701         for (i=0;i<ctdb->num_nodes;i++) {
702                 public_ips = ctdb->nodes[i]->public_ips;
703
704                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
705                         continue;
706                 }
707
708                 /* there were no public ips for this node */
709                 if (public_ips == NULL) {
710                         continue;
711                 }               
712
713                 for (j=0;j<public_ips->num;j++) {
714                         struct ctdb_public_ip_list *tmp_ip; 
715
716                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
717                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
718                         tmp_ip->pnn  = public_ips->ips[j].pnn;
719                         tmp_ip->addr = public_ips->ips[j].addr;
720                         tmp_ip->next = NULL;
721
722                         trbt_insertarray32_callback(ctdb->ip_tree,
723                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
724                                 add_ip_callback,
725                                 tmp_ip);
726                 }
727         }
728
729         ip_list = NULL;
730         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
731
732         return ip_list;
733 }
734
735 /*
736   make any IP alias changes for public addresses that are necessary 
737  */
738 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
739 {
740         int i, num_healthy, retries;
741         struct ctdb_public_ip ip;
742         struct ctdb_public_ipv4 ipv4;
743         uint32_t mask;
744         struct ctdb_public_ip_list *all_ips, *tmp_ip;
745         int maxnode, maxnum=0, minnode, minnum=0, num;
746         TDB_DATA data;
747         struct timeval timeout;
748         struct client_async_data *async_data;
749         struct ctdb_client_control_state *state;
750         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
751
752
753         ZERO_STRUCT(ip);
754
755         /* Count how many completely healthy nodes we have */
756         num_healthy = 0;
757         for (i=0;i<nodemap->num;i++) {
758                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
759                         num_healthy++;
760                 }
761         }
762
763         if (num_healthy > 0) {
764                 /* We have healthy nodes, so only consider them for 
765                    serving public addresses
766                 */
767                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
768         } else {
769                 /* We didnt have any completely healthy nodes so
770                    use "disabled" nodes as a fallback
771                 */
772                 mask = NODE_FLAGS_INACTIVE;
773         }
774
775         /* since nodes only know about those public addresses that
776            can be served by that particular node, no single node has
777            a full list of all public addresses that exist in the cluster.
778            Walk over all node structures and create a merged list of
779            all public addresses that exist in the cluster.
780
781            keep the tree of ips around as ctdb->ip_tree
782         */
783         all_ips = create_merged_ip_list(ctdb);
784
785         /* If we want deterministic ip allocations, i.e. that the ip addresses
786            will always be allocated the same way for a specific set of
787            available/unavailable nodes.
788         */
789         if (1 == ctdb->tunable.deterministic_public_ips) {              
790                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
791                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
792                         tmp_ip->pnn = i%nodemap->num;
793                 }
794         }
795
796
797         /* mark all public addresses with a masked node as being served by
798            node -1
799         */
800         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
801                 if (tmp_ip->pnn == -1) {
802                         continue;
803                 }
804                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
805                         tmp_ip->pnn = -1;
806                 }
807         }
808
809         /* verify that the assigned nodes can serve that public ip
810            and set it to -1 if not
811         */
812         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
813                 if (tmp_ip->pnn == -1) {
814                         continue;
815                 }
816                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
817                         /* this node can not serve this ip. */
818                         tmp_ip->pnn = -1;
819                 }
820         }
821
822
823         /* now we must redistribute all public addresses with takeover node
824            -1 among the nodes available
825         */
826         retries = 0;
827 try_again:
828         /* loop over all ip's and find a physical node to cover for 
829            each unassigned ip.
830         */
831         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
832                 if (tmp_ip->pnn == -1) {
833                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
834                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
835                                         ctdb_addr_to_str(&tmp_ip->addr)));
836                         }
837                 }
838         }
839
840         /* If we dont want ips to fail back after a node becomes healthy
841            again, we wont even try to reallocat the ip addresses so that
842            they are evenly spread out.
843            This can NOT be used at the same time as DeterministicIPs !
844         */
845         if (1 == ctdb->tunable.no_ip_failback) {
846                 if (1 == ctdb->tunable.deterministic_public_ips) {
847                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
848                 }
849                 goto finished;
850         }
851
852
853         /* now, try to make sure the ip adresses are evenly distributed
854            across the node.
855            for each ip address, loop over all nodes that can serve this
856            ip and make sure that the difference between the node
857            serving the most and the node serving the least ip's are not greater
858            than 1.
859         */
860         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
861                 if (tmp_ip->pnn == -1) {
862                         continue;
863                 }
864
865                 /* Get the highest and lowest number of ips's served by any 
866                    valid node which can serve this ip.
867                 */
868                 maxnode = -1;
869                 minnode = -1;
870                 for (i=0;i<nodemap->num;i++) {
871                         if (nodemap->nodes[i].flags & mask) {
872                                 continue;
873                         }
874
875                         /* only check nodes that can actually serve this ip */
876                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
877                                 /* no it couldnt   so skip to the next node */
878                                 continue;
879                         }
880
881                         num = node_ip_coverage(ctdb, i, all_ips);
882                         if (maxnode == -1) {
883                                 maxnode = i;
884                                 maxnum  = num;
885                         } else {
886                                 if (num > maxnum) {
887                                         maxnode = i;
888                                         maxnum  = num;
889                                 }
890                         }
891                         if (minnode == -1) {
892                                 minnode = i;
893                                 minnum  = num;
894                         } else {
895                                 if (num < minnum) {
896                                         minnode = i;
897                                         minnum  = num;
898                                 }
899                         }
900                 }
901                 if (maxnode == -1) {
902                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
903                                 ctdb_addr_to_str(&tmp_ip->addr)));
904
905                         continue;
906                 }
907
908                 /* If we want deterministic IPs then dont try to reallocate 
909                    them to spread out the load.
910                 */
911                 if (1 == ctdb->tunable.deterministic_public_ips) {
912                         continue;
913                 }
914
915                 /* if the spread between the smallest and largest coverage by
916                    a node is >=2 we steal one of the ips from the node with
917                    most coverage to even things out a bit.
918                    try to do this at most 5 times  since we dont want to spend
919                    too much time balancing the ip coverage.
920                 */
921                 if ( (maxnum > minnum+1)
922                   && (retries < 5) ){
923                         struct ctdb_public_ip_list *tmp;
924
925                         /* mark one of maxnode's vnn's as unassigned and try
926                            again
927                         */
928                         for (tmp=all_ips;tmp;tmp=tmp->next) {
929                                 if (tmp->pnn == maxnode) {
930                                         tmp->pnn = -1;
931                                         retries++;
932                                         goto try_again;
933                                 }
934                         }
935                 }
936         }
937
938
939         /* finished distributing the public addresses, now just send the 
940            info out to the nodes
941         */
942 finished:
943
944         /* at this point ->pnn is the node which will own each IP
945            or -1 if there is no node that can cover this ip
946         */
947
948         /* now tell all nodes to delete any alias that they should not
949            have.  This will be a NOOP on nodes that don't currently
950            hold the given alias */
951         async_data = talloc_zero(tmp_ctx, struct client_async_data);
952         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
953
954         for (i=0;i<nodemap->num;i++) {
955                 /* don't talk to unconnected nodes, but do talk to banned nodes */
956                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
957                         continue;
958                 }
959
960                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
961                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
962                                 /* This node should be serving this
963                                    vnn so dont tell it to release the ip
964                                 */
965                                 continue;
966                         }
967                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
968                                 ipv4.pnn = tmp_ip->pnn;
969                                 ipv4.sin = tmp_ip->addr.ip;
970
971                                 timeout = TAKEOVER_TIMEOUT();
972                                 data.dsize = sizeof(ipv4);
973                                 data.dptr  = (uint8_t *)&ipv4;
974                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
975                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
976                                                 data, async_data,
977                                                 &timeout, NULL);
978                         } else {
979                                 ip.pnn  = tmp_ip->pnn;
980                                 ip.addr = tmp_ip->addr;
981
982                                 timeout = TAKEOVER_TIMEOUT();
983                                 data.dsize = sizeof(ip);
984                                 data.dptr  = (uint8_t *)&ip;
985                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
986                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
987                                                 data, async_data,
988                                                 &timeout, NULL);
989                         }
990
991                         if (state == NULL) {
992                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
993                                 talloc_free(tmp_ctx);
994                                 return -1;
995                         }
996                 
997                         ctdb_client_async_add(async_data, state);
998                 }
999         }
1000         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1001                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1002                 talloc_free(tmp_ctx);
1003                 return -1;
1004         }
1005         talloc_free(async_data);
1006
1007
1008         /* tell all nodes to get their own IPs */
1009         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1010         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1011         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1012                 if (tmp_ip->pnn == -1) {
1013                         /* this IP won't be taken over */
1014                         continue;
1015                 }
1016
1017                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1018                         ipv4.pnn = tmp_ip->pnn;
1019                         ipv4.sin = tmp_ip->addr.ip;
1020
1021                         timeout = TAKEOVER_TIMEOUT();
1022                         data.dsize = sizeof(ipv4);
1023                         data.dptr  = (uint8_t *)&ipv4;
1024                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1025                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1026                                         data, async_data,
1027                                         &timeout, NULL);
1028                 } else {
1029                         ip.pnn  = tmp_ip->pnn;
1030                         ip.addr = tmp_ip->addr;
1031
1032                         timeout = TAKEOVER_TIMEOUT();
1033                         data.dsize = sizeof(ip);
1034                         data.dptr  = (uint8_t *)&ip;
1035                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1036                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1037                                         data, async_data,
1038                                         &timeout, NULL);
1039                 }
1040                 if (state == NULL) {
1041                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1042                         talloc_free(tmp_ctx);
1043                         return -1;
1044                 }
1045                 
1046                 ctdb_client_async_add(async_data, state);
1047         }
1048         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1049                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1050                 talloc_free(tmp_ctx);
1051                 return -1;
1052         }
1053
1054         talloc_free(tmp_ctx);
1055         return 0;
1056 }
1057
1058
1059 /*
1060   destroy a ctdb_client_ip structure
1061  */
1062 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1063 {
1064         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1065                 ctdb_addr_to_str(&ip->addr),
1066                 ntohs(ip->addr.ip.sin_port),
1067                 ip->client_id));
1068
1069         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1070         return 0;
1071 }
1072
1073 /*
1074   called by a client to inform us of a TCP connection that it is managing
1075   that should tickled with an ACK when IP takeover is done
1076   we handle both the old ipv4 style of packets as well as the new ipv4/6
1077   pdus.
1078  */
1079 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1080                                 TDB_DATA indata)
1081 {
1082         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1083         struct ctdb_control_tcp *old_addr = NULL;
1084         struct ctdb_control_tcp_addr new_addr;
1085         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1086         struct ctdb_tcp_list *tcp;
1087         struct ctdb_control_tcp_vnn t;
1088         int ret;
1089         TDB_DATA data;
1090         struct ctdb_client_ip *ip;
1091         struct ctdb_vnn *vnn;
1092         ctdb_sock_addr addr;
1093
1094         switch (indata.dsize) {
1095         case sizeof(struct ctdb_control_tcp):
1096                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1097                 ZERO_STRUCT(new_addr);
1098                 tcp_sock = &new_addr;
1099                 tcp_sock->src.ip  = old_addr->src;
1100                 tcp_sock->dest.ip = old_addr->dest;
1101                 break;
1102         case sizeof(struct ctdb_control_tcp_addr):
1103                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1104                 break;
1105         default:
1106                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1107                                  "to ctdb_control_tcp_client. size was %d but "
1108                                  "only allowed sizes are %lu and %lu\n",
1109                                  (int)indata.dsize,
1110                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1111                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1112                 return -1;
1113         }
1114
1115         addr = tcp_sock->src;
1116         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1117         addr = tcp_sock->dest;
1118         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1119
1120         ZERO_STRUCT(addr);
1121         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1122         vnn = find_public_ip_vnn(ctdb, &addr);
1123         if (vnn == NULL) {
1124                 switch (addr.sa.sa_family) {
1125                 case AF_INET:
1126                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1127                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1128                                         ctdb_addr_to_str(&addr)));
1129                         }
1130                         break;
1131                 case AF_INET6:
1132                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1133                                 ctdb_addr_to_str(&addr)));
1134                         break;
1135                 default:
1136                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1137                 }
1138
1139                 return 0;
1140         }
1141
1142         if (vnn->pnn != ctdb->pnn) {
1143                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1144                         ctdb_addr_to_str(&addr),
1145                         client_id, client->pid));
1146                 /* failing this call will tell smbd to die */
1147                 return -1;
1148         }
1149
1150         ip = talloc(client, struct ctdb_client_ip);
1151         CTDB_NO_MEMORY(ctdb, ip);
1152
1153         ip->ctdb      = ctdb;
1154         ip->addr      = addr;
1155         ip->client_id = client_id;
1156         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1157         DLIST_ADD(ctdb->client_ip_list, ip);
1158
1159         tcp = talloc(client, struct ctdb_tcp_list);
1160         CTDB_NO_MEMORY(ctdb, tcp);
1161
1162         tcp->connection.src_addr = tcp_sock->src;
1163         tcp->connection.dst_addr = tcp_sock->dest;
1164
1165         DLIST_ADD(client->tcp_list, tcp);
1166
1167         t.src  = tcp_sock->src;
1168         t.dest = tcp_sock->dest;
1169
1170         data.dptr = (uint8_t *)&t;
1171         data.dsize = sizeof(t);
1172
1173         switch (addr.sa.sa_family) {
1174         case AF_INET:
1175                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1176                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1177                         ctdb_addr_to_str(&tcp_sock->src),
1178                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1179                 break;
1180         case AF_INET6:
1181                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1182                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1183                         ctdb_addr_to_str(&tcp_sock->src),
1184                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1185                 break;
1186         default:
1187                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1188         }
1189
1190
1191         /* tell all nodes about this tcp connection */
1192         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1193                                        CTDB_CONTROL_TCP_ADD,
1194                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1195         if (ret != 0) {
1196                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1197                 return -1;
1198         }
1199
1200         return 0;
1201 }
1202
1203 /*
1204   find a tcp address on a list
1205  */
1206 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1207                                            struct ctdb_tcp_connection *tcp)
1208 {
1209         int i;
1210
1211         if (array == NULL) {
1212                 return NULL;
1213         }
1214
1215         for (i=0;i<array->num;i++) {
1216                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1217                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1218                         return &array->connections[i];
1219                 }
1220         }
1221         return NULL;
1222 }
1223
1224 /*
1225   called by a daemon to inform us of a TCP connection that one of its
1226   clients managing that should tickled with an ACK when IP takeover is
1227   done
1228  */
1229 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1230 {
1231         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1232         struct ctdb_tcp_array *tcparray;
1233         struct ctdb_tcp_connection tcp;
1234         struct ctdb_vnn *vnn;
1235
1236         vnn = find_public_ip_vnn(ctdb, &p->dest);
1237         if (vnn == NULL) {
1238                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1239                         ctdb_addr_to_str(&p->dest)));
1240
1241                 return -1;
1242         }
1243
1244
1245         tcparray = vnn->tcp_array;
1246
1247         /* If this is the first tickle */
1248         if (tcparray == NULL) {
1249                 tcparray = talloc_size(ctdb->nodes, 
1250                         offsetof(struct ctdb_tcp_array, connections) +
1251                         sizeof(struct ctdb_tcp_connection) * 1);
1252                 CTDB_NO_MEMORY(ctdb, tcparray);
1253                 vnn->tcp_array = tcparray;
1254
1255                 tcparray->num = 0;
1256                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1257                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1258
1259                 tcparray->connections[tcparray->num].src_addr = p->src;
1260                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1261                 tcparray->num++;
1262                 return 0;
1263         }
1264
1265
1266         /* Do we already have this tickle ?*/
1267         tcp.src_addr = p->src;
1268         tcp.dst_addr = p->dest;
1269         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1270                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1271                         ctdb_addr_to_str(&tcp.dst_addr),
1272                         ntohs(tcp.dst_addr.ip.sin_port),
1273                         vnn->pnn));
1274                 return 0;
1275         }
1276
1277         /* A new tickle, we must add it to the array */
1278         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1279                                         struct ctdb_tcp_connection,
1280                                         tcparray->num+1);
1281         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1282
1283         vnn->tcp_array = tcparray;
1284         tcparray->connections[tcparray->num].src_addr = p->src;
1285         tcparray->connections[tcparray->num].dst_addr = p->dest;
1286         tcparray->num++;
1287                                 
1288         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1289                 ctdb_addr_to_str(&tcp.dst_addr),
1290                 ntohs(tcp.dst_addr.ip.sin_port),
1291                 vnn->pnn));
1292
1293         return 0;
1294 }
1295
1296
1297 /*
1298   called by a daemon to inform us of a TCP connection that one of its
1299   clients managing that should tickled with an ACK when IP takeover is
1300   done
1301  */
1302 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1303 {
1304         struct ctdb_tcp_connection *tcpp;
1305         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1306
1307         if (vnn == NULL) {
1308                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1309                         ctdb_addr_to_str(&conn->dst_addr)));
1310                 return;
1311         }
1312
1313         /* if the array is empty we cant remove it
1314            and we dont need to do anything
1315          */
1316         if (vnn->tcp_array == NULL) {
1317                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1318                         ctdb_addr_to_str(&conn->dst_addr),
1319                         ntohs(conn->dst_addr.ip.sin_port)));
1320                 return;
1321         }
1322
1323
1324         /* See if we know this connection
1325            if we dont know this connection  then we dont need to do anything
1326          */
1327         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1328         if (tcpp == NULL) {
1329                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1330                         ctdb_addr_to_str(&conn->dst_addr),
1331                         ntohs(conn->dst_addr.ip.sin_port)));
1332                 return;
1333         }
1334
1335
1336         /* We need to remove this entry from the array.
1337            Instead of allocating a new array and copying data to it
1338            we cheat and just copy the last entry in the existing array
1339            to the entry that is to be removed and just shring the 
1340            ->num field
1341          */
1342         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1343         vnn->tcp_array->num--;
1344
1345         /* If we deleted the last entry we also need to remove the entire array
1346          */
1347         if (vnn->tcp_array->num == 0) {
1348                 talloc_free(vnn->tcp_array);
1349                 vnn->tcp_array = NULL;
1350         }               
1351
1352         vnn->tcp_update_needed = true;
1353
1354         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1355                 ctdb_addr_to_str(&conn->src_addr),
1356                 ntohs(conn->src_addr.ip.sin_port)));
1357 }
1358
1359
1360 /*
1361   called when a daemon restarts - send all tickes for all public addresses
1362   we are serving immediately to the new node.
1363  */
1364 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1365 {
1366 /*XXX here we should send all tickes we are serving to the new node */
1367         return 0;
1368 }
1369
1370
1371 /*
1372   called when a client structure goes away - hook to remove
1373   elements from the tcp_list in all daemons
1374  */
1375 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1376 {
1377         while (client->tcp_list) {
1378                 struct ctdb_tcp_list *tcp = client->tcp_list;
1379                 DLIST_REMOVE(client->tcp_list, tcp);
1380                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1381         }
1382 }
1383
1384
1385 /*
1386   release all IPs on shutdown
1387  */
1388 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1389 {
1390         struct ctdb_vnn *vnn;
1391
1392         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1393                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1394                         continue;
1395                 }
1396                 if (vnn->pnn == ctdb->pnn) {
1397                         vnn->pnn = -1;
1398                 }
1399                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1400                                   vnn->iface, 
1401                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1402                                   vnn->public_netmask_bits);
1403                 release_kill_clients(ctdb, &vnn->public_address);
1404         }
1405 }
1406
1407
1408 /*
1409   get list of public IPs
1410  */
1411 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1412                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1413 {
1414         int i, num, len;
1415         struct ctdb_all_public_ips *ips;
1416         struct ctdb_vnn *vnn;
1417
1418         /* count how many public ip structures we have */
1419         num = 0;
1420         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1421                 num++;
1422         }
1423
1424         len = offsetof(struct ctdb_all_public_ips, ips) + 
1425                 num*sizeof(struct ctdb_public_ip);
1426         ips = talloc_zero_size(outdata, len);
1427         CTDB_NO_MEMORY(ctdb, ips);
1428
1429         outdata->dsize = len;
1430         outdata->dptr  = (uint8_t *)ips;
1431
1432         ips->num = num;
1433         i = 0;
1434         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1435                 ips->ips[i].pnn  = vnn->pnn;
1436                 ips->ips[i].addr = vnn->public_address;
1437                 i++;
1438         }
1439
1440         return 0;
1441 }
1442
1443
1444 /*
1445   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1446  */
1447 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1448                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1449 {
1450         int i, num, len;
1451         struct ctdb_all_public_ipsv4 *ips;
1452         struct ctdb_vnn *vnn;
1453
1454         /* count how many public ip structures we have */
1455         num = 0;
1456         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1457                 if (vnn->public_address.sa.sa_family != AF_INET) {
1458                         continue;
1459                 }
1460                 num++;
1461         }
1462
1463         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1464                 num*sizeof(struct ctdb_public_ipv4);
1465         ips = talloc_zero_size(outdata, len);
1466         CTDB_NO_MEMORY(ctdb, ips);
1467
1468         outdata->dsize = len;
1469         outdata->dptr  = (uint8_t *)ips;
1470
1471         ips->num = num;
1472         i = 0;
1473         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1474                 if (vnn->public_address.sa.sa_family != AF_INET) {
1475                         continue;
1476                 }
1477                 ips->ips[i].pnn = vnn->pnn;
1478                 ips->ips[i].sin = vnn->public_address.ip;
1479                 i++;
1480         }
1481
1482         return 0;
1483 }
1484
1485
1486 /* 
1487    structure containing the listening socket and the list of tcp connections
1488    that the ctdb daemon is to kill
1489 */
1490 struct ctdb_kill_tcp {
1491         struct ctdb_vnn *vnn;
1492         struct ctdb_context *ctdb;
1493         int capture_fd;
1494         struct fd_event *fde;
1495         trbt_tree_t *connections;
1496         void *private_data;
1497 };
1498
1499 /*
1500   a tcp connection that is to be killed
1501  */
1502 struct ctdb_killtcp_con {
1503         ctdb_sock_addr src_addr;
1504         ctdb_sock_addr dst_addr;
1505         int count;
1506         struct ctdb_kill_tcp *killtcp;
1507 };
1508
1509 /* this function is used to create a key to represent this socketpair
1510    in the killtcp tree.
1511    this key is used to insert and lookup matching socketpairs that are
1512    to be tickled and RST
1513 */
1514 #define KILLTCP_KEYLEN  10
1515 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1516 {
1517         static uint32_t key[KILLTCP_KEYLEN];
1518
1519         bzero(key, sizeof(key));
1520
1521         if (src->sa.sa_family != dst->sa.sa_family) {
1522                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1523                 return key;
1524         }
1525         
1526         switch (src->sa.sa_family) {
1527         case AF_INET:
1528                 key[0]  = dst->ip.sin_addr.s_addr;
1529                 key[1]  = src->ip.sin_addr.s_addr;
1530                 key[2]  = dst->ip.sin_port;
1531                 key[3]  = src->ip.sin_port;
1532                 break;
1533         case AF_INET6:
1534                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1535                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1536                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1537                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1538                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1539                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1540                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1541                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1542                 key[8]  = dst->ip6.sin6_port;
1543                 key[9]  = src->ip6.sin6_port;
1544                 break;
1545         default:
1546                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1547                 return key;
1548         }
1549
1550         return key;
1551 }
1552
1553 /*
1554   called when we get a read event on the raw socket
1555  */
1556 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1557                                 uint16_t flags, void *private_data)
1558 {
1559         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1560         struct ctdb_killtcp_con *con;
1561         ctdb_sock_addr src, dst;
1562         uint32_t ack_seq, seq;
1563
1564         if (!(flags & EVENT_FD_READ)) {
1565                 return;
1566         }
1567
1568         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1569                                 killtcp->private_data,
1570                                 &src, &dst,
1571                                 &ack_seq, &seq) != 0) {
1572                 /* probably a non-tcp ACK packet */
1573                 return;
1574         }
1575
1576         /* check if we have this guy in our list of connections
1577            to kill
1578         */
1579         con = trbt_lookuparray32(killtcp->connections, 
1580                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1581         if (con == NULL) {
1582                 /* no this was some other packet we can just ignore */
1583                 return;
1584         }
1585
1586         /* This one has been tickled !
1587            now reset him and remove him from the list.
1588          */
1589         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1590                 ntohs(con->dst_addr.ip.sin_port),
1591                 ctdb_addr_to_str(&con->src_addr),
1592                 ntohs(con->src_addr.ip.sin_port)));
1593
1594         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1595         talloc_free(con);
1596 }
1597
1598
1599 /* when traversing the list of all tcp connections to send tickle acks to
1600    (so that we can capture the ack coming back and kill the connection
1601     by a RST)
1602    this callback is called for each connection we are currently trying to kill
1603 */
1604 static void tickle_connection_traverse(void *param, void *data)
1605 {
1606         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1607
1608         /* have tried too many times, just give up */
1609         if (con->count >= 5) {
1610                 talloc_free(con);
1611                 return;
1612         }
1613
1614         /* othervise, try tickling it again */
1615         con->count++;
1616         ctdb_sys_send_tcp(
1617                 (ctdb_sock_addr *)&con->dst_addr,
1618                 (ctdb_sock_addr *)&con->src_addr,
1619                 0, 0, 0);
1620 }
1621
1622
1623 /* 
1624    called every second until all sentenced connections have been reset
1625  */
1626 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1627                                               struct timeval t, void *private_data)
1628 {
1629         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1630
1631
1632         /* loop over all connections sending tickle ACKs */
1633         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1634
1635
1636         /* If there are no more connections to kill we can remove the
1637            entire killtcp structure
1638          */
1639         if ( (killtcp->connections == NULL) || 
1640              (killtcp->connections->root == NULL) ) {
1641                 talloc_free(killtcp);
1642                 return;
1643         }
1644
1645         /* try tickling them again in a seconds time
1646          */
1647         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1648                         ctdb_tickle_sentenced_connections, killtcp);
1649 }
1650
1651 /*
1652   destroy the killtcp structure
1653  */
1654 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1655 {
1656         killtcp->vnn->killtcp = NULL;
1657         return 0;
1658 }
1659
1660
1661 /* nothing fancy here, just unconditionally replace any existing
1662    connection structure with the new one.
1663
1664    dont even free the old one if it did exist, that one is talloc_stolen
1665    by the same node in the tree anyway and will be deleted when the new data 
1666    is deleted
1667 */
1668 static void *add_killtcp_callback(void *parm, void *data)
1669 {
1670         return parm;
1671 }
1672
1673 /*
1674   add a tcp socket to the list of connections we want to RST
1675  */
1676 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1677                                        ctdb_sock_addr *s,
1678                                        ctdb_sock_addr *d)
1679 {
1680         ctdb_sock_addr src, dst;
1681         struct ctdb_kill_tcp *killtcp;
1682         struct ctdb_killtcp_con *con;
1683         struct ctdb_vnn *vnn;
1684
1685         ctdb_canonicalize_ip(s, &src);
1686         ctdb_canonicalize_ip(d, &dst);
1687
1688         vnn = find_public_ip_vnn(ctdb, &dst);
1689         if (vnn == NULL) {
1690                 vnn = find_public_ip_vnn(ctdb, &src);
1691         }
1692         if (vnn == NULL) {
1693                 /* if it is not a public ip   it could be our 'single ip' */
1694                 if (ctdb->single_ip_vnn) {
1695                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1696                                 vnn = ctdb->single_ip_vnn;
1697                         }
1698                 }
1699         }
1700         if (vnn == NULL) {
1701                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1702                 return -1;
1703         }
1704
1705         killtcp = vnn->killtcp;
1706         
1707         /* If this is the first connection to kill we must allocate
1708            a new structure
1709          */
1710         if (killtcp == NULL) {
1711                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1712                 CTDB_NO_MEMORY(ctdb, killtcp);
1713
1714                 killtcp->vnn         = vnn;
1715                 killtcp->ctdb        = ctdb;
1716                 killtcp->capture_fd  = -1;
1717                 killtcp->connections = trbt_create(killtcp, 0);
1718
1719                 vnn->killtcp         = killtcp;
1720                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1721         }
1722
1723
1724
1725         /* create a structure that describes this connection we want to
1726            RST and store it in killtcp->connections
1727         */
1728         con = talloc(killtcp, struct ctdb_killtcp_con);
1729         CTDB_NO_MEMORY(ctdb, con);
1730         con->src_addr = src;
1731         con->dst_addr = dst;
1732         con->count    = 0;
1733         con->killtcp  = killtcp;
1734
1735
1736         trbt_insertarray32_callback(killtcp->connections,
1737                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1738                         add_killtcp_callback, con);
1739
1740         /* 
1741            If we dont have a socket to listen on yet we must create it
1742          */
1743         if (killtcp->capture_fd == -1) {
1744                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1745                 if (killtcp->capture_fd == -1) {
1746                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1747                         goto failed;
1748                 }
1749         }
1750
1751
1752         if (killtcp->fde == NULL) {
1753                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1754                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1755                                             capture_tcp_handler, killtcp);
1756
1757                 /* We also need to set up some events to tickle all these connections
1758                    until they are all reset
1759                 */
1760                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1761                                 ctdb_tickle_sentenced_connections, killtcp);
1762         }
1763
1764         /* tickle him once now */
1765         ctdb_sys_send_tcp(
1766                 &con->dst_addr,
1767                 &con->src_addr,
1768                 0, 0, 0);
1769
1770         return 0;
1771
1772 failed:
1773         talloc_free(vnn->killtcp);
1774         vnn->killtcp = NULL;
1775         return -1;
1776 }
1777
1778 /*
1779   kill a TCP connection.
1780  */
1781 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1782 {
1783         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1784
1785         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1786 }
1787
1788 /*
1789   called by a daemon to inform us of the entire list of TCP tickles for
1790   a particular public address.
1791   this control should only be sent by the node that is currently serving
1792   that public address.
1793  */
1794 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1795 {
1796         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1797         struct ctdb_tcp_array *tcparray;
1798         struct ctdb_vnn *vnn;
1799
1800         /* We must at least have tickles.num or else we cant verify the size
1801            of the received data blob
1802          */
1803         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1804                                         tickles.connections)) {
1805                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1806                 return -1;
1807         }
1808
1809         /* verify that the size of data matches what we expect */
1810         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1811                                 tickles.connections)
1812                          + sizeof(struct ctdb_tcp_connection)
1813                                  * list->tickles.num) {
1814                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1815                 return -1;
1816         }       
1817
1818         vnn = find_public_ip_vnn(ctdb, &list->addr);
1819         if (vnn == NULL) {
1820                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1821                         ctdb_addr_to_str(&list->addr)));
1822
1823                 return 1;
1824         }
1825
1826         /* remove any old ticklelist we might have */
1827         talloc_free(vnn->tcp_array);
1828         vnn->tcp_array = NULL;
1829
1830         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1831         CTDB_NO_MEMORY(ctdb, tcparray);
1832
1833         tcparray->num = list->tickles.num;
1834
1835         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1836         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1837
1838         memcpy(tcparray->connections, &list->tickles.connections[0], 
1839                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1840
1841         /* We now have a new fresh tickle list array for this vnn */
1842         vnn->tcp_array = talloc_steal(vnn, tcparray);
1843         
1844         return 0;
1845 }
1846
1847 /*
1848   called to return the full list of tickles for the puclic address associated 
1849   with the provided vnn
1850  */
1851 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1852 {
1853         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1854         struct ctdb_control_tcp_tickle_list *list;
1855         struct ctdb_tcp_array *tcparray;
1856         int num;
1857         struct ctdb_vnn *vnn;
1858
1859         vnn = find_public_ip_vnn(ctdb, addr);
1860         if (vnn == NULL) {
1861                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1862                         ctdb_addr_to_str(addr)));
1863
1864                 return 1;
1865         }
1866
1867         tcparray = vnn->tcp_array;
1868         if (tcparray) {
1869                 num = tcparray->num;
1870         } else {
1871                 num = 0;
1872         }
1873
1874         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1875                                 tickles.connections)
1876                         + sizeof(struct ctdb_tcp_connection) * num;
1877
1878         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1879         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1880         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1881
1882         list->addr = *addr;
1883         list->tickles.num = num;
1884         if (num) {
1885                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1886                         sizeof(struct ctdb_tcp_connection) * num);
1887         }
1888
1889         return 0;
1890 }
1891
1892
1893 /*
1894   set the list of all tcp tickles for a public address
1895  */
1896 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1897                               struct timeval timeout, uint32_t destnode, 
1898                               ctdb_sock_addr *addr,
1899                               struct ctdb_tcp_array *tcparray)
1900 {
1901         int ret, num;
1902         TDB_DATA data;
1903         struct ctdb_control_tcp_tickle_list *list;
1904
1905         if (tcparray) {
1906                 num = tcparray->num;
1907         } else {
1908                 num = 0;
1909         }
1910
1911         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1912                                 tickles.connections) +
1913                         sizeof(struct ctdb_tcp_connection) * num;
1914         data.dptr = talloc_size(ctdb, data.dsize);
1915         CTDB_NO_MEMORY(ctdb, data.dptr);
1916
1917         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1918         list->addr = *addr;
1919         list->tickles.num = num;
1920         if (tcparray) {
1921                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1922         }
1923
1924         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1925                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1926                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1927         if (ret != 0) {
1928                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1929                 return -1;
1930         }
1931
1932         talloc_free(data.dptr);
1933
1934         return ret;
1935 }
1936
1937
1938 /*
1939   perform tickle updates if required
1940  */
1941 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1942                                 struct timed_event *te, 
1943                                 struct timeval t, void *private_data)
1944 {
1945         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1946         int ret;
1947         struct ctdb_vnn *vnn;
1948
1949         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1950                 /* we only send out updates for public addresses that 
1951                    we have taken over
1952                  */
1953                 if (ctdb->pnn != vnn->pnn) {
1954                         continue;
1955                 }
1956                 /* We only send out the updates if we need to */
1957                 if (!vnn->tcp_update_needed) {
1958                         continue;
1959                 }
1960                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1961                                 TAKEOVER_TIMEOUT(),
1962                                 CTDB_BROADCAST_CONNECTED,
1963                                 &vnn->public_address,
1964                                 vnn->tcp_array);
1965                 if (ret != 0) {
1966                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1967                                 ctdb_addr_to_str(&vnn->public_address)));
1968                 }
1969         }
1970
1971         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1972                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1973                              ctdb_update_tcp_tickles, ctdb);
1974 }               
1975         
1976
1977 /*
1978   start periodic update of tcp tickles
1979  */
1980 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1981 {
1982         ctdb->tickle_update_context = talloc_new(ctdb);
1983
1984         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1985                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1986                              ctdb_update_tcp_tickles, ctdb);
1987 }
1988
1989
1990
1991
1992 struct control_gratious_arp {
1993         struct ctdb_context *ctdb;
1994         ctdb_sock_addr addr;
1995         const char *iface;
1996         int count;
1997 };
1998
1999 /*
2000   send a control_gratuitous arp
2001  */
2002 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2003                                   struct timeval t, void *private_data)
2004 {
2005         int ret;
2006         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2007                                                         struct control_gratious_arp);
2008
2009         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2010         if (ret != 0) {
2011                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2012         }
2013
2014
2015         arp->count++;
2016         if (arp->count == CTDB_ARP_REPEAT) {
2017                 talloc_free(arp);
2018                 return;
2019         }
2020
2021         event_add_timed(arp->ctdb->ev, arp, 
2022                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2023                         send_gratious_arp, arp);
2024 }
2025
2026
2027 /*
2028   send a gratious arp 
2029  */
2030 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2031 {
2032         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2033         struct control_gratious_arp *arp;
2034
2035         /* verify the size of indata */
2036         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2037                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2038                                  (unsigned)indata.dsize, 
2039                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2040                 return -1;
2041         }
2042         if (indata.dsize != 
2043                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2044                 + gratious_arp->len ) ){
2045
2046                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2047                         "but should be %u bytes\n", 
2048                          (unsigned)indata.dsize, 
2049                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2050                 return -1;
2051         }
2052
2053
2054         arp = talloc(ctdb, struct control_gratious_arp);
2055         CTDB_NO_MEMORY(ctdb, arp);
2056
2057         arp->ctdb  = ctdb;
2058         arp->addr   = gratious_arp->addr;
2059         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2060         CTDB_NO_MEMORY(ctdb, arp->iface);
2061         arp->count = 0;
2062         
2063         event_add_timed(arp->ctdb->ev, arp, 
2064                         timeval_zero(), send_gratious_arp, arp);
2065
2066         return 0;
2067 }
2068
2069 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2070 {
2071         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2072         int ret;
2073
2074         /* verify the size of indata */
2075         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2076                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2077                 return -1;
2078         }
2079         if (indata.dsize != 
2080                 ( offsetof(struct ctdb_control_ip_iface, iface)
2081                 + pub->len ) ){
2082
2083                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2084                         "but should be %u bytes\n", 
2085                          (unsigned)indata.dsize, 
2086                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2087                 return -1;
2088         }
2089
2090         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2091
2092         if (ret != 0) {
2093                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2094                 return -1;
2095         }
2096
2097         return 0;
2098 }
2099
2100 /*
2101   called when releaseip event finishes for del_public_address
2102  */
2103 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2104                                 void *private_data)
2105 {
2106         talloc_free(private_data);
2107 }
2108
2109 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2110 {
2111         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2112         struct ctdb_vnn *vnn;
2113         int ret;
2114
2115         /* verify the size of indata */
2116         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2117                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2118                 return -1;
2119         }
2120         if (indata.dsize != 
2121                 ( offsetof(struct ctdb_control_ip_iface, iface)
2122                 + pub->len ) ){
2123
2124                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2125                         "but should be %u bytes\n", 
2126                          (unsigned)indata.dsize, 
2127                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2128                 return -1;
2129         }
2130
2131         /* walk over all public addresses until we find a match */
2132         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2133                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2134                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2135
2136                         DLIST_REMOVE(ctdb->vnn, vnn);
2137
2138                         ret = ctdb_event_script_callback(ctdb, 
2139                                          mem_ctx, delete_ip_callback, mem_ctx,
2140                                          false,
2141                                          CTDB_EVENT_RELEASE_IP,
2142                                          "%s %s %u",
2143                                          vnn->iface, 
2144                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2145                                          vnn->public_netmask_bits);
2146                         talloc_free(vnn);
2147                         if (ret != 0) {
2148                                 return -1;
2149                         }
2150                         return 0;
2151                 }
2152         }
2153
2154         return -1;
2155 }
2156
2157 /* This function is called from the recovery daemon to verify that a remote
2158    node has the expected ip allocation.
2159    This is verified against ctdb->ip_tree
2160 */
2161 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2162 {
2163         struct ctdb_public_ip_list *tmp_ip; 
2164         int i;
2165
2166         if (ctdb->ip_tree == NULL) {
2167                 /* dont know the expected allocation yet, assume remote node
2168                    is correct. */
2169                 return 0;
2170         }
2171
2172         if (ips == NULL) {
2173                 return 0;
2174         }
2175
2176         for (i=0; i<ips->num; i++) {
2177                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2178                 if (tmp_ip == NULL) {
2179                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2180                         return -1;
2181                 }
2182
2183                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2184                         continue;
2185                 }
2186
2187                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2188                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2189                         return -1;
2190                 }
2191         }
2192
2193         return 0;
2194 }