Change the loglevel of "registered tcp client for ..." to INFO
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(ctdb);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(ctdb, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239                                          state, takeover_ip_callback, state,
240                                          "takeip %s %s %u",
241                                          vnn->iface, 
242                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243                                          vnn->public_netmask_bits);
244
245         if (ret != 0) {
246                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247                         ctdb_addr_to_str(&pip->addr),
248                         vnn->iface));
249                 talloc_free(state);
250                 return -1;
251         }
252
253         /* tell ctdb_control.c that we will be replying asynchronously */
254         *async_reply = true;
255
256         return 0;
257 }
258
259 /*
260   takeover an ip address old v4 style
261  */
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
263                                 struct ctdb_req_control *c,
264                                 TDB_DATA indata, 
265                                 bool *async_reply)
266 {
267         TDB_DATA data;
268         
269         data.dsize = sizeof(struct ctdb_public_ip);
270         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271         CTDB_NO_MEMORY(ctdb, data.dptr);
272         
273         memcpy(data.dptr, indata.dptr, indata.dsize);
274         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
275 }
276
277 /*
278   kill any clients that are registered with a IP that is being released
279  */
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
281 {
282         struct ctdb_client_ip *ip;
283
284         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285                 ctdb_addr_to_str(addr)));
286
287         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288                 ctdb_sock_addr tmp_addr;
289
290                 tmp_addr = ip->addr;
291                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
292                         ip->client_id,
293                         ctdb_addr_to_str(&ip->addr)));
294
295                 if (ctdb_same_ip(&tmp_addr, addr)) {
296                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
297                                                                      ip->client_id, 
298                                                                      struct ctdb_client);
299                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
300                                 ip->client_id,
301                                 ctdb_addr_to_str(&ip->addr),
302                                 client->pid));
303
304                         if (client->pid != 0) {
305                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306                                         (unsigned)client->pid,
307                                         ctdb_addr_to_str(addr),
308                                         ip->client_id));
309                                 kill(client->pid, SIGKILL);
310                         }
311                 }
312         }
313 }
314
315 /*
316   called when releaseip event finishes
317  */
318 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
319                                 void *private_data)
320 {
321         struct takeover_callback_state *state = 
322                 talloc_get_type(private_data, struct takeover_callback_state);
323         TDB_DATA data;
324
325         /* send a message to all clients of this node telling them
326            that the cluster has been reconfigured and they should
327            release any sockets on this IP */
328         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329         data.dsize = strlen((char *)data.dptr)+1;
330
331         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
332
333         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
334
335         /* kill clients that have registered with this IP */
336         release_kill_clients(ctdb, state->addr);
337         
338         /* the control succeeded */
339         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
340         talloc_free(state);
341 }
342
343 /*
344   release an ip address
345  */
346 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
347                                 struct ctdb_req_control *c,
348                                 TDB_DATA indata, 
349                                 bool *async_reply)
350 {
351         int ret;
352         struct takeover_callback_state *state;
353         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
354         struct ctdb_vnn *vnn;
355
356         /* update our vnn list */
357         vnn = find_public_ip_vnn(ctdb, &pip->addr);
358         if (vnn == NULL) {
359                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
360                         ctdb_addr_to_str(&pip->addr)));
361                 return 0;
362         }
363         vnn->pnn = pip->pnn;
364
365         /* stop any previous arps */
366         talloc_free(vnn->takeover_ctx);
367         vnn->takeover_ctx = NULL;
368
369         if (!ctdb_sys_have_ip(&pip->addr)) {
370                 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
371                         ctdb_addr_to_str(&pip->addr),
372                         vnn->public_netmask_bits, 
373                         vnn->iface));
374                 return 0;
375         }
376
377         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
378                 ctdb_addr_to_str(&pip->addr),
379                 vnn->public_netmask_bits, 
380                 vnn->iface));
381
382         state = talloc(ctdb, struct takeover_callback_state);
383         CTDB_NO_MEMORY(ctdb, state);
384
385         state->c = talloc_steal(state, c);
386         state->addr = talloc(state, ctdb_sock_addr);       
387         CTDB_NO_MEMORY(ctdb, state->addr);
388         *state->addr = pip->addr;
389         state->vnn   = vnn;
390
391         ret = ctdb_event_script_callback(ctdb, 
392                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
393                                          state, release_ip_callback, state,
394                                          "releaseip %s %s %u",
395                                          vnn->iface, 
396                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
397                                          vnn->public_netmask_bits);
398         if (ret != 0) {
399                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
400                         ctdb_addr_to_str(&pip->addr),
401                         vnn->iface));
402                 talloc_free(state);
403                 return -1;
404         }
405
406         /* tell the control that we will be reply asynchronously */
407         *async_reply = true;
408         return 0;
409 }
410
411 /*
412   release an ip address old v4 style
413  */
414 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
415                                 struct ctdb_req_control *c,
416                                 TDB_DATA indata, 
417                                 bool *async_reply)
418 {
419         TDB_DATA data;
420         
421         data.dsize = sizeof(struct ctdb_public_ip);
422         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
423         CTDB_NO_MEMORY(ctdb, data.dptr);
424         
425         memcpy(data.dptr, indata.dptr, indata.dsize);
426         return ctdb_control_release_ip(ctdb, c, data, async_reply);
427 }
428
429
430 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
431 {
432         struct ctdb_vnn      *vnn;
433
434         /* Verify that we dont have an entry for this ip yet */
435         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
436                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
437                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
438                                 ctdb_addr_to_str(addr)));
439                         return -1;
440                 }               
441         }
442
443         /* create a new vnn structure for this ip address */
444         vnn = talloc_zero(ctdb, struct ctdb_vnn);
445         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
446         vnn->iface = talloc_strdup(vnn, iface);
447         vnn->public_address      = *addr;
448         vnn->public_netmask_bits = mask;
449         vnn->pnn                 = -1;
450         
451         DLIST_ADD(ctdb->vnn, vnn);
452
453         return 0;
454 }
455
456
457 /*
458   setup the event script directory
459 */
460 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
461 {
462         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
463         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
464         return 0;
465 }
466
467 /*
468   setup the public address lists from a file
469 */
470 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
471 {
472         char **lines;
473         int nlines;
474         int i;
475
476         lines = file_lines_load(alist, &nlines, ctdb);
477         if (lines == NULL) {
478                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
479                 return -1;
480         }
481         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
482                 nlines--;
483         }
484
485         for (i=0;i<nlines;i++) {
486                 unsigned mask;
487                 ctdb_sock_addr addr;
488                 const char *addrstr;
489                 const char *iface;
490                 char *tok, *line;
491
492                 line = lines[i];
493                 while ((*line == ' ') || (*line == '\t')) {
494                         line++;
495                 }
496                 if (*line == '#') {
497                         continue;
498                 }
499                 if (strcmp(line, "") == 0) {
500                         continue;
501                 }
502                 tok = strtok(line, " \t");
503                 addrstr = tok;
504                 tok = strtok(NULL, " \t");
505                 if (tok == NULL) {
506                         if (NULL == ctdb->default_public_interface) {
507                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
508                                          i+1));
509                                 talloc_free(lines);
510                                 return -1;
511                         }
512                         iface = ctdb->default_public_interface;
513                 } else {
514                         iface = tok;
515                 }
516
517                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
518                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
519                         talloc_free(lines);
520                         return -1;
521                 }
522                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
523                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
524                         talloc_free(lines);
525                         return -1;
526                 }
527         }
528
529         talloc_free(lines);
530         return 0;
531 }
532
533
534
535
536 struct ctdb_public_ip_list {
537         struct ctdb_public_ip_list *next;
538         uint32_t pnn;
539         ctdb_sock_addr addr;
540 };
541
542
543 /* Given a physical node, return the number of
544    public addresses that is currently assigned to this node.
545 */
546 static int node_ip_coverage(struct ctdb_context *ctdb, 
547         int32_t pnn,
548         struct ctdb_public_ip_list *ips)
549 {
550         int num=0;
551
552         for (;ips;ips=ips->next) {
553                 if (ips->pnn == pnn) {
554                         num++;
555                 }
556         }
557         return num;
558 }
559
560
561 /* Check if this is a public ip known to the node, i.e. can that
562    node takeover this ip ?
563 */
564 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
565                 struct ctdb_public_ip_list *ip)
566 {
567         struct ctdb_all_public_ips *public_ips;
568         int i;
569
570         public_ips = ctdb->nodes[pnn]->public_ips;
571
572         if (public_ips == NULL) {
573                 return -1;
574         }
575
576         for (i=0;i<public_ips->num;i++) {
577                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
578                         /* yes, this node can serve this public ip */
579                         return 0;
580                 }
581         }
582
583         return -1;
584 }
585
586
587 /* search the node lists list for a node to takeover this ip.
588    pick the node that currently are serving the least number of ips
589    so that the ips get spread out evenly.
590 */
591 static int find_takeover_node(struct ctdb_context *ctdb, 
592                 struct ctdb_node_map *nodemap, uint32_t mask, 
593                 struct ctdb_public_ip_list *ip,
594                 struct ctdb_public_ip_list *all_ips)
595 {
596         int pnn, min=0, num;
597         int i;
598
599         pnn    = -1;
600         for (i=0;i<nodemap->num;i++) {
601                 if (nodemap->nodes[i].flags & mask) {
602                         /* This node is not healty and can not be used to serve
603                            a public address 
604                         */
605                         continue;
606                 }
607
608                 /* verify that this node can serve this ip */
609                 if (can_node_serve_ip(ctdb, i, ip)) {
610                         /* no it couldnt   so skip to the next node */
611                         continue;
612                 }
613
614                 num = node_ip_coverage(ctdb, i, all_ips);
615                 /* was this the first node we checked ? */
616                 if (pnn == -1) {
617                         pnn = i;
618                         min  = num;
619                 } else {
620                         if (num < min) {
621                                 pnn = i;
622                                 min  = num;
623                         }
624                 }
625         }       
626         if (pnn == -1) {
627                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
628                         ctdb_addr_to_str(&ip->addr)));
629
630                 return -1;
631         }
632
633         ip->pnn = pnn;
634         return 0;
635 }
636
637 struct ctdb_public_ip_list *
638 add_ip_to_merged_list(struct ctdb_context *ctdb,
639                         TALLOC_CTX *tmp_ctx, 
640                         struct ctdb_public_ip_list *ip_list, 
641                         struct ctdb_public_ip *ip)
642 {
643         struct ctdb_public_ip_list *tmp_ip; 
644
645         /* do we already have this ip in our merged list ?*/
646         for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
647
648                 /* we already have this public ip in the list */
649                 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
650                         return ip_list;
651                 }
652         }
653
654         /* this is a new public ip, we must add it to the list */
655         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
656         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
657         tmp_ip->pnn  = ip->pnn;
658         tmp_ip->addr = ip->addr;
659         tmp_ip->next = ip_list;
660
661         return tmp_ip;
662 }
663
664 struct ctdb_public_ip_list *
665 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
666 {
667         int i, j;
668         struct ctdb_public_ip_list *ip_list = NULL;
669         struct ctdb_all_public_ips *public_ips;
670
671         for (i=0;i<ctdb->num_nodes;i++) {
672                 public_ips = ctdb->nodes[i]->public_ips;
673
674                 /* there were no public ips for this node */
675                 if (public_ips == NULL) {
676                         continue;
677                 }               
678
679                 for (j=0;j<public_ips->num;j++) {
680                         ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
681                                         ip_list, &public_ips->ips[j]);
682                 }
683         }
684
685         return ip_list;
686 }
687
688 /*
689   make any IP alias changes for public addresses that are necessary 
690  */
691 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
692 {
693         int i, num_healthy, retries;
694         struct ctdb_public_ip ip;
695         struct ctdb_public_ipv4 ipv4;
696         uint32_t mask;
697         struct ctdb_public_ip_list *all_ips, *tmp_ip;
698         int maxnode, maxnum=0, minnode, minnum=0, num;
699         TDB_DATA data;
700         struct timeval timeout;
701         struct client_async_data *async_data;
702         struct ctdb_client_control_state *state;
703         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
704
705
706         ZERO_STRUCT(ip);
707
708         /* Count how many completely healthy nodes we have */
709         num_healthy = 0;
710         for (i=0;i<nodemap->num;i++) {
711                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
712                         num_healthy++;
713                 }
714         }
715
716         if (num_healthy > 0) {
717                 /* We have healthy nodes, so only consider them for 
718                    serving public addresses
719                 */
720                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
721         } else {
722                 /* We didnt have any completely healthy nodes so
723                    use "disabled" nodes as a fallback
724                 */
725                 mask = NODE_FLAGS_INACTIVE;
726         }
727
728         /* since nodes only know about those public addresses that
729            can be served by that particular node, no single node has
730            a full list of all public addresses that exist in the cluster.
731            Walk over all node structures and create a merged list of
732            all public addresses that exist in the cluster.
733         */
734         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
735
736         /* If we want deterministic ip allocations, i.e. that the ip addresses
737            will always be allocated the same way for a specific set of
738            available/unavailable nodes.
739         */
740         if (1 == ctdb->tunable.deterministic_public_ips) {              
741                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
742                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
743                         tmp_ip->pnn = i%nodemap->num;
744                 }
745         }
746
747
748         /* mark all public addresses with a masked node as being served by
749            node -1
750         */
751         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
752                 if (tmp_ip->pnn == -1) {
753                         continue;
754                 }
755                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
756                         tmp_ip->pnn = -1;
757                 }
758         }
759
760         /* verify that the assigned nodes can serve that public ip
761            and set it to -1 if not
762         */
763         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
764                 if (tmp_ip->pnn == -1) {
765                         continue;
766                 }
767                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
768                         /* this node can not serve this ip. */
769                         tmp_ip->pnn = -1;
770                 }
771         }
772
773
774         /* now we must redistribute all public addresses with takeover node
775            -1 among the nodes available
776         */
777         retries = 0;
778 try_again:
779         /* loop over all ip's and find a physical node to cover for 
780            each unassigned ip.
781         */
782         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
783                 if (tmp_ip->pnn == -1) {
784                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
785                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
786                                         ctdb_addr_to_str(&tmp_ip->addr)));
787                         }
788                 }
789         }
790
791         /* If we dont want ips to fail back after a node becomes healthy
792            again, we wont even try to reallocat the ip addresses so that
793            they are evenly spread out.
794            This can NOT be used at the same time as DeterministicIPs !
795         */
796         if (1 == ctdb->tunable.no_ip_failback) {
797                 if (1 == ctdb->tunable.deterministic_public_ips) {
798                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
799                 }
800                 goto finished;
801         }
802
803
804         /* now, try to make sure the ip adresses are evenly distributed
805            across the node.
806            for each ip address, loop over all nodes that can serve this
807            ip and make sure that the difference between the node
808            serving the most and the node serving the least ip's are not greater
809            than 1.
810         */
811         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
812                 if (tmp_ip->pnn == -1) {
813                         continue;
814                 }
815
816                 /* Get the highest and lowest number of ips's served by any 
817                    valid node which can serve this ip.
818                 */
819                 maxnode = -1;
820                 minnode = -1;
821                 for (i=0;i<nodemap->num;i++) {
822                         if (nodemap->nodes[i].flags & mask) {
823                                 continue;
824                         }
825
826                         /* only check nodes that can actually serve this ip */
827                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
828                                 /* no it couldnt   so skip to the next node */
829                                 continue;
830                         }
831
832                         num = node_ip_coverage(ctdb, i, all_ips);
833                         if (maxnode == -1) {
834                                 maxnode = i;
835                                 maxnum  = num;
836                         } else {
837                                 if (num > maxnum) {
838                                         maxnode = i;
839                                         maxnum  = num;
840                                 }
841                         }
842                         if (minnode == -1) {
843                                 minnode = i;
844                                 minnum  = num;
845                         } else {
846                                 if (num < minnum) {
847                                         minnode = i;
848                                         minnum  = num;
849                                 }
850                         }
851                 }
852                 if (maxnode == -1) {
853                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
854                                 ctdb_addr_to_str(&tmp_ip->addr)));
855
856                         continue;
857                 }
858
859                 /* If we want deterministic IPs then dont try to reallocate 
860                    them to spread out the load.
861                 */
862                 if (1 == ctdb->tunable.deterministic_public_ips) {
863                         continue;
864                 }
865
866                 /* if the spread between the smallest and largest coverage by
867                    a node is >=2 we steal one of the ips from the node with
868                    most coverage to even things out a bit.
869                    try to do this at most 5 times  since we dont want to spend
870                    too much time balancing the ip coverage.
871                 */
872                 if ( (maxnum > minnum+1)
873                   && (retries < 5) ){
874                         struct ctdb_public_ip_list *tmp;
875
876                         /* mark one of maxnode's vnn's as unassigned and try
877                            again
878                         */
879                         for (tmp=all_ips;tmp;tmp=tmp->next) {
880                                 if (tmp->pnn == maxnode) {
881                                         tmp->pnn = -1;
882                                         retries++;
883                                         goto try_again;
884                                 }
885                         }
886                 }
887         }
888
889
890         /* finished distributing the public addresses, now just send the 
891            info out to the nodes
892         */
893 finished:
894
895         /* at this point ->pnn is the node which will own each IP
896            or -1 if there is no node that can cover this ip
897         */
898
899         /* now tell all nodes to delete any alias that they should not
900            have.  This will be a NOOP on nodes that don't currently
901            hold the given alias */
902         async_data = talloc_zero(tmp_ctx, struct client_async_data);
903         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
904
905         for (i=0;i<nodemap->num;i++) {
906                 /* don't talk to unconnected nodes, but do talk to banned nodes */
907                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
908                         continue;
909                 }
910
911                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
912                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
913                                 /* This node should be serving this
914                                    vnn so dont tell it to release the ip
915                                 */
916                                 continue;
917                         }
918                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
919                                 ipv4.pnn = tmp_ip->pnn;
920                                 ipv4.sin = tmp_ip->addr.ip;
921
922                                 timeout = TAKEOVER_TIMEOUT();
923                                 data.dsize = sizeof(ipv4);
924                                 data.dptr  = (uint8_t *)&ipv4;
925                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
926                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
927                                                 data, async_data,
928                                                 &timeout, NULL);
929                         } else {
930                                 ip.pnn  = tmp_ip->pnn;
931                                 ip.addr = tmp_ip->addr;
932
933                                 timeout = TAKEOVER_TIMEOUT();
934                                 data.dsize = sizeof(ip);
935                                 data.dptr  = (uint8_t *)&ip;
936                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
937                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
938                                                 data, async_data,
939                                                 &timeout, NULL);
940                         }
941
942                         if (state == NULL) {
943                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
944                                 talloc_free(tmp_ctx);
945                                 return -1;
946                         }
947                 
948                         ctdb_client_async_add(async_data, state);
949                 }
950         }
951         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
952                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
953                 talloc_free(tmp_ctx);
954                 return -1;
955         }
956         talloc_free(async_data);
957
958
959         /* tell all nodes to get their own IPs */
960         async_data = talloc_zero(tmp_ctx, struct client_async_data);
961         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
962         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
963                 if (tmp_ip->pnn == -1) {
964                         /* this IP won't be taken over */
965                         continue;
966                 }
967
968                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
969                         ipv4.pnn = tmp_ip->pnn;
970                         ipv4.sin = tmp_ip->addr.ip;
971
972                         timeout = TAKEOVER_TIMEOUT();
973                         data.dsize = sizeof(ipv4);
974                         data.dptr  = (uint8_t *)&ipv4;
975                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
976                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
977                                         data, async_data,
978                                         &timeout, NULL);
979                 } else {
980                         ip.pnn  = tmp_ip->pnn;
981                         ip.addr = tmp_ip->addr;
982
983                         timeout = TAKEOVER_TIMEOUT();
984                         data.dsize = sizeof(ip);
985                         data.dptr  = (uint8_t *)&ip;
986                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
987                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
988                                         data, async_data,
989                                         &timeout, NULL);
990                 }
991                 if (state == NULL) {
992                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
993                         talloc_free(tmp_ctx);
994                         return -1;
995                 }
996                 
997                 ctdb_client_async_add(async_data, state);
998         }
999         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1000                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1001                 talloc_free(tmp_ctx);
1002                 return -1;
1003         }
1004
1005         talloc_free(tmp_ctx);
1006         return 0;
1007 }
1008
1009
1010 /*
1011   destroy a ctdb_client_ip structure
1012  */
1013 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1014 {
1015         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1016                 ctdb_addr_to_str(&ip->addr),
1017                 ntohs(ip->addr.ip.sin_port),
1018                 ip->client_id));
1019
1020         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1021         return 0;
1022 }
1023
1024 /*
1025   called by a client to inform us of a TCP connection that it is managing
1026   that should tickled with an ACK when IP takeover is done
1027   we handle both the old ipv4 style of packets as well as the new ipv4/6
1028   pdus.
1029  */
1030 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1031                                 TDB_DATA indata)
1032 {
1033         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1034         struct ctdb_control_tcp *old_addr = NULL;
1035         struct ctdb_control_tcp_addr new_addr;
1036         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1037         struct ctdb_tcp_list *tcp;
1038         struct ctdb_control_tcp_vnn t;
1039         int ret;
1040         TDB_DATA data;
1041         struct ctdb_client_ip *ip;
1042         struct ctdb_vnn *vnn;
1043         ctdb_sock_addr addr;
1044
1045         switch (indata.dsize) {
1046         case sizeof(struct ctdb_control_tcp):
1047                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1048                 ZERO_STRUCT(new_addr);
1049                 tcp_sock = &new_addr;
1050                 tcp_sock->src.ip  = old_addr->src;
1051                 tcp_sock->dest.ip = old_addr->dest;
1052                 break;
1053         case sizeof(struct ctdb_control_tcp_addr):
1054                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1055                 break;
1056         default:
1057                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1058                 return -1;
1059         }
1060
1061         addr = tcp_sock->src;
1062         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1063         addr = tcp_sock->dest;
1064         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1065
1066         ZERO_STRUCT(addr);
1067         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1068         vnn = find_public_ip_vnn(ctdb, &addr);
1069         if (vnn == NULL) {
1070                 switch (addr.sa.sa_family) {
1071                 case AF_INET:
1072                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1073                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1074                                         ctdb_addr_to_str(&addr)));
1075                         }
1076                         break;
1077                 case AF_INET6:
1078                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1079                                 ctdb_addr_to_str(&addr)));
1080                         break;
1081                 default:
1082                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1083                 }
1084
1085                 return 0;
1086         }
1087
1088         if (vnn->pnn != ctdb->pnn) {
1089                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1090                         ctdb_addr_to_str(&addr),
1091                         client_id, client->pid));
1092                 /* failing this call will tell smbd to die */
1093                 return -1;
1094         }
1095
1096         ip = talloc(client, struct ctdb_client_ip);
1097         CTDB_NO_MEMORY(ctdb, ip);
1098
1099         ip->ctdb      = ctdb;
1100         ip->addr      = addr;
1101         ip->client_id = client_id;
1102         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1103         DLIST_ADD(ctdb->client_ip_list, ip);
1104
1105         tcp = talloc(client, struct ctdb_tcp_list);
1106         CTDB_NO_MEMORY(ctdb, tcp);
1107
1108         tcp->connection.src_addr = tcp_sock->src;
1109         tcp->connection.dst_addr = tcp_sock->dest;
1110
1111         DLIST_ADD(client->tcp_list, tcp);
1112
1113         t.src  = tcp_sock->src;
1114         t.dest = tcp_sock->dest;
1115
1116         data.dptr = (uint8_t *)&t;
1117         data.dsize = sizeof(t);
1118
1119         switch (addr.sa.sa_family) {
1120         case AF_INET:
1121                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1122                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1123                         ctdb_addr_to_str(&tcp_sock->src),
1124                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1125                 break;
1126         case AF_INET6:
1127                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1128                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1129                         ctdb_addr_to_str(&tcp_sock->src),
1130                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1131                 break;
1132         default:
1133                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1134         }
1135
1136
1137         /* tell all nodes about this tcp connection */
1138         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1139                                        CTDB_CONTROL_TCP_ADD,
1140                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1141         if (ret != 0) {
1142                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1143                 return -1;
1144         }
1145
1146         return 0;
1147 }
1148
1149 /*
1150   find a tcp address on a list
1151  */
1152 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1153                                            struct ctdb_tcp_connection *tcp)
1154 {
1155         int i;
1156
1157         if (array == NULL) {
1158                 return NULL;
1159         }
1160
1161         for (i=0;i<array->num;i++) {
1162                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1163                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1164                         return &array->connections[i];
1165                 }
1166         }
1167         return NULL;
1168 }
1169
1170 /*
1171   called by a daemon to inform us of a TCP connection that one of its
1172   clients managing that should tickled with an ACK when IP takeover is
1173   done
1174  */
1175 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1176 {
1177         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1178         struct ctdb_tcp_array *tcparray;
1179         struct ctdb_tcp_connection tcp;
1180         struct ctdb_vnn *vnn;
1181
1182         vnn = find_public_ip_vnn(ctdb, &p->dest);
1183         if (vnn == NULL) {
1184                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1185                         ctdb_addr_to_str(&p->dest)));
1186
1187                 return -1;
1188         }
1189
1190
1191         tcparray = vnn->tcp_array;
1192
1193         /* If this is the first tickle */
1194         if (tcparray == NULL) {
1195                 tcparray = talloc_size(ctdb->nodes, 
1196                         offsetof(struct ctdb_tcp_array, connections) +
1197                         sizeof(struct ctdb_tcp_connection) * 1);
1198                 CTDB_NO_MEMORY(ctdb, tcparray);
1199                 vnn->tcp_array = tcparray;
1200
1201                 tcparray->num = 0;
1202                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1203                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1204
1205                 tcparray->connections[tcparray->num].src_addr = p->src;
1206                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1207                 tcparray->num++;
1208                 return 0;
1209         }
1210
1211
1212         /* Do we already have this tickle ?*/
1213         tcp.src_addr = p->src;
1214         tcp.dst_addr = p->dest;
1215         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1216                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1217                         ctdb_addr_to_str(&tcp.dst_addr),
1218                         ntohs(tcp.dst_addr.ip.sin_port),
1219                         vnn->pnn));
1220                 return 0;
1221         }
1222
1223         /* A new tickle, we must add it to the array */
1224         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1225                                         struct ctdb_tcp_connection,
1226                                         tcparray->num+1);
1227         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1228
1229         vnn->tcp_array = tcparray;
1230         tcparray->connections[tcparray->num].src_addr = p->src;
1231         tcparray->connections[tcparray->num].dst_addr = p->dest;
1232         tcparray->num++;
1233                                 
1234         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1235                 ctdb_addr_to_str(&tcp.dst_addr),
1236                 ntohs(tcp.dst_addr.ip.sin_port),
1237                 vnn->pnn));
1238
1239         return 0;
1240 }
1241
1242
1243 /*
1244   called by a daemon to inform us of a TCP connection that one of its
1245   clients managing that should tickled with an ACK when IP takeover is
1246   done
1247  */
1248 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1249 {
1250         struct ctdb_tcp_connection *tcpp;
1251         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1252
1253         if (vnn == NULL) {
1254                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1255                         ctdb_addr_to_str(&conn->dst_addr)));
1256                 return;
1257         }
1258
1259         /* if the array is empty we cant remove it
1260            and we dont need to do anything
1261          */
1262         if (vnn->tcp_array == NULL) {
1263                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1264                         ctdb_addr_to_str(&conn->dst_addr),
1265                         ntohs(conn->dst_addr.ip.sin_port)));
1266                 return;
1267         }
1268
1269
1270         /* See if we know this connection
1271            if we dont know this connection  then we dont need to do anything
1272          */
1273         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1274         if (tcpp == NULL) {
1275                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1276                         ctdb_addr_to_str(&conn->dst_addr),
1277                         ntohs(conn->dst_addr.ip.sin_port)));
1278                 return;
1279         }
1280
1281
1282         /* We need to remove this entry from the array.
1283            Instead of allocating a new array and copying data to it
1284            we cheat and just copy the last entry in the existing array
1285            to the entry that is to be removed and just shring the 
1286            ->num field
1287          */
1288         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1289         vnn->tcp_array->num--;
1290
1291         /* If we deleted the last entry we also need to remove the entire array
1292          */
1293         if (vnn->tcp_array->num == 0) {
1294                 talloc_free(vnn->tcp_array);
1295                 vnn->tcp_array = NULL;
1296         }               
1297
1298         vnn->tcp_update_needed = true;
1299
1300         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1301                 ctdb_addr_to_str(&conn->src_addr),
1302                 ntohs(conn->src_addr.ip.sin_port)));
1303 }
1304
1305
1306 /*
1307   called when a daemon restarts - send all tickes for all public addresses
1308   we are serving immediately to the new node.
1309  */
1310 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1311 {
1312 /*XXX here we should send all tickes we are serving to the new node */
1313         return 0;
1314 }
1315
1316
1317 /*
1318   called when a client structure goes away - hook to remove
1319   elements from the tcp_list in all daemons
1320  */
1321 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1322 {
1323         while (client->tcp_list) {
1324                 struct ctdb_tcp_list *tcp = client->tcp_list;
1325                 DLIST_REMOVE(client->tcp_list, tcp);
1326                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1327         }
1328 }
1329
1330
1331 /*
1332   release all IPs on shutdown
1333  */
1334 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1335 {
1336         struct ctdb_vnn *vnn;
1337
1338         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1339                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1340                         continue;
1341                 }
1342                 if (vnn->pnn == ctdb->pnn) {
1343                         vnn->pnn = -1;
1344                 }
1345                 ctdb_event_script(ctdb, "releaseip %s %s %u",
1346                                   vnn->iface, 
1347                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1348                                   vnn->public_netmask_bits);
1349                 release_kill_clients(ctdb, &vnn->public_address);
1350         }
1351 }
1352
1353
1354 /*
1355   get list of public IPs
1356  */
1357 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1358                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1359 {
1360         int i, num, len;
1361         struct ctdb_all_public_ips *ips;
1362         struct ctdb_vnn *vnn;
1363
1364         /* count how many public ip structures we have */
1365         num = 0;
1366         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1367                 num++;
1368         }
1369
1370         len = offsetof(struct ctdb_all_public_ips, ips) + 
1371                 num*sizeof(struct ctdb_public_ip);
1372         ips = talloc_zero_size(outdata, len);
1373         CTDB_NO_MEMORY(ctdb, ips);
1374
1375         outdata->dsize = len;
1376         outdata->dptr  = (uint8_t *)ips;
1377
1378         ips->num = num;
1379         i = 0;
1380         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1381                 ips->ips[i].pnn  = vnn->pnn;
1382                 ips->ips[i].addr = vnn->public_address;
1383                 i++;
1384         }
1385
1386         return 0;
1387 }
1388
1389
1390 /*
1391   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1392  */
1393 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1394                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1395 {
1396         int i, num, len;
1397         struct ctdb_all_public_ipsv4 *ips;
1398         struct ctdb_vnn *vnn;
1399
1400         /* count how many public ip structures we have */
1401         num = 0;
1402         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1403                 if (vnn->public_address.sa.sa_family != AF_INET) {
1404                         continue;
1405                 }
1406                 num++;
1407         }
1408
1409         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1410                 num*sizeof(struct ctdb_public_ipv4);
1411         ips = talloc_zero_size(outdata, len);
1412         CTDB_NO_MEMORY(ctdb, ips);
1413
1414         outdata->dsize = len;
1415         outdata->dptr  = (uint8_t *)ips;
1416
1417         ips->num = num;
1418         i = 0;
1419         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1420                 if (vnn->public_address.sa.sa_family != AF_INET) {
1421                         continue;
1422                 }
1423                 ips->ips[i].pnn = vnn->pnn;
1424                 ips->ips[i].sin = vnn->public_address.ip;
1425                 i++;
1426         }
1427
1428         return 0;
1429 }
1430
1431
1432 /* 
1433    structure containing the listening socket and the list of tcp connections
1434    that the ctdb daemon is to kill
1435 */
1436 struct ctdb_kill_tcp {
1437         struct ctdb_vnn *vnn;
1438         struct ctdb_context *ctdb;
1439         int capture_fd;
1440         struct fd_event *fde;
1441         trbt_tree_t *connections;
1442         void *private_data;
1443 };
1444
1445 /*
1446   a tcp connection that is to be killed
1447  */
1448 struct ctdb_killtcp_con {
1449         ctdb_sock_addr src_addr;
1450         ctdb_sock_addr dst_addr;
1451         int count;
1452         struct ctdb_kill_tcp *killtcp;
1453 };
1454
1455 /* this function is used to create a key to represent this socketpair
1456    in the killtcp tree.
1457    this key is used to insert and lookup matching socketpairs that are
1458    to be tickled and RST
1459 */
1460 #define KILLTCP_KEYLEN  10
1461 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1462 {
1463         static uint32_t key[KILLTCP_KEYLEN];
1464
1465         bzero(key, sizeof(key));
1466
1467         if (src->sa.sa_family != dst->sa.sa_family) {
1468                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1469                 return key;
1470         }
1471         
1472         switch (src->sa.sa_family) {
1473         case AF_INET:
1474                 key[0]  = dst->ip.sin_addr.s_addr;
1475                 key[1]  = src->ip.sin_addr.s_addr;
1476                 key[2]  = dst->ip.sin_port;
1477                 key[3]  = src->ip.sin_port;
1478                 break;
1479         case AF_INET6:
1480                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1481                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1482                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1483                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1484                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1485                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1486                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1487                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1488                 key[8]  = dst->ip6.sin6_port;
1489                 key[9]  = src->ip6.sin6_port;
1490                 break;
1491         default:
1492                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1493                 return key;
1494         }
1495
1496         return key;
1497 }
1498
1499 /*
1500   called when we get a read event on the raw socket
1501  */
1502 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1503                                 uint16_t flags, void *private_data)
1504 {
1505         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1506         struct ctdb_killtcp_con *con;
1507         ctdb_sock_addr src, dst;
1508         uint32_t ack_seq, seq;
1509
1510         if (!(flags & EVENT_FD_READ)) {
1511                 return;
1512         }
1513
1514         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1515                                 killtcp->private_data,
1516                                 &src, &dst,
1517                                 &ack_seq, &seq) != 0) {
1518                 /* probably a non-tcp ACK packet */
1519                 return;
1520         }
1521
1522         /* check if we have this guy in our list of connections
1523            to kill
1524         */
1525         con = trbt_lookuparray32(killtcp->connections, 
1526                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1527         if (con == NULL) {
1528                 /* no this was some other packet we can just ignore */
1529                 return;
1530         }
1531
1532         /* This one has been tickled !
1533            now reset him and remove him from the list.
1534          */
1535         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1536                 ntohs(con->dst_addr.ip.sin_port),
1537                 ctdb_addr_to_str(&con->src_addr),
1538                 ntohs(con->src_addr.ip.sin_port)));
1539
1540         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1541         talloc_free(con);
1542 }
1543
1544
1545 /* when traversing the list of all tcp connections to send tickle acks to
1546    (so that we can capture the ack coming back and kill the connection
1547     by a RST)
1548    this callback is called for each connection we are currently trying to kill
1549 */
1550 static void tickle_connection_traverse(void *param, void *data)
1551 {
1552         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1553
1554         /* have tried too many times, just give up */
1555         if (con->count >= 5) {
1556                 talloc_free(con);
1557                 return;
1558         }
1559
1560         /* othervise, try tickling it again */
1561         con->count++;
1562         ctdb_sys_send_tcp(
1563                 (ctdb_sock_addr *)&con->dst_addr,
1564                 (ctdb_sock_addr *)&con->src_addr,
1565                 0, 0, 0);
1566 }
1567
1568
1569 /* 
1570    called every second until all sentenced connections have been reset
1571  */
1572 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1573                                               struct timeval t, void *private_data)
1574 {
1575         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1576
1577
1578         /* loop over all connections sending tickle ACKs */
1579         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1580
1581
1582         /* If there are no more connections to kill we can remove the
1583            entire killtcp structure
1584          */
1585         if ( (killtcp->connections == NULL) || 
1586              (killtcp->connections->root == NULL) ) {
1587                 talloc_free(killtcp);
1588                 return;
1589         }
1590
1591         /* try tickling them again in a seconds time
1592          */
1593         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1594                         ctdb_tickle_sentenced_connections, killtcp);
1595 }
1596
1597 /*
1598   destroy the killtcp structure
1599  */
1600 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1601 {
1602         killtcp->vnn->killtcp = NULL;
1603         return 0;
1604 }
1605
1606
1607 /* nothing fancy here, just unconditionally replace any existing
1608    connection structure with the new one.
1609
1610    dont even free the old one if it did exist, that one is talloc_stolen
1611    by the same node in the tree anyway and will be deleted when the new data 
1612    is deleted
1613 */
1614 static void *add_killtcp_callback(void *parm, void *data)
1615 {
1616         return parm;
1617 }
1618
1619 /*
1620   add a tcp socket to the list of connections we want to RST
1621  */
1622 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1623                                        ctdb_sock_addr *s,
1624                                        ctdb_sock_addr *d)
1625 {
1626         ctdb_sock_addr src, dst;
1627         struct ctdb_kill_tcp *killtcp;
1628         struct ctdb_killtcp_con *con;
1629         struct ctdb_vnn *vnn;
1630
1631         ctdb_canonicalize_ip(s, &src);
1632         ctdb_canonicalize_ip(d, &dst);
1633
1634         vnn = find_public_ip_vnn(ctdb, &dst);
1635         if (vnn == NULL) {
1636                 vnn = find_public_ip_vnn(ctdb, &src);
1637         }
1638         if (vnn == NULL) {
1639                 /* if it is not a public ip   it could be our 'single ip' */
1640                 if (ctdb->single_ip_vnn) {
1641                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1642                                 vnn = ctdb->single_ip_vnn;
1643                         }
1644                 }
1645         }
1646         if (vnn == NULL) {
1647                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1648                 return -1;
1649         }
1650
1651         killtcp = vnn->killtcp;
1652         
1653         /* If this is the first connection to kill we must allocate
1654            a new structure
1655          */
1656         if (killtcp == NULL) {
1657                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1658                 CTDB_NO_MEMORY(ctdb, killtcp);
1659
1660                 killtcp->vnn         = vnn;
1661                 killtcp->ctdb        = ctdb;
1662                 killtcp->capture_fd  = -1;
1663                 killtcp->connections = trbt_create(killtcp, 0);
1664
1665                 vnn->killtcp         = killtcp;
1666                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1667         }
1668
1669
1670
1671         /* create a structure that describes this connection we want to
1672            RST and store it in killtcp->connections
1673         */
1674         con = talloc(killtcp, struct ctdb_killtcp_con);
1675         CTDB_NO_MEMORY(ctdb, con);
1676         con->src_addr = src;
1677         con->dst_addr = dst;
1678         con->count    = 0;
1679         con->killtcp  = killtcp;
1680
1681
1682         trbt_insertarray32_callback(killtcp->connections,
1683                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1684                         add_killtcp_callback, con);
1685
1686         /* 
1687            If we dont have a socket to listen on yet we must create it
1688          */
1689         if (killtcp->capture_fd == -1) {
1690                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1691                 if (killtcp->capture_fd == -1) {
1692                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1693                         goto failed;
1694                 }
1695         }
1696
1697
1698         if (killtcp->fde == NULL) {
1699                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1700                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1701                                             capture_tcp_handler, killtcp);
1702
1703                 /* We also need to set up some events to tickle all these connections
1704                    until they are all reset
1705                 */
1706                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1707                                 ctdb_tickle_sentenced_connections, killtcp);
1708         }
1709
1710         /* tickle him once now */
1711         ctdb_sys_send_tcp(
1712                 &con->dst_addr,
1713                 &con->src_addr,
1714                 0, 0, 0);
1715
1716         return 0;
1717
1718 failed:
1719         talloc_free(vnn->killtcp);
1720         vnn->killtcp = NULL;
1721         return -1;
1722 }
1723
1724 /*
1725   kill a TCP connection.
1726  */
1727 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1728 {
1729         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1730
1731         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1732 }
1733
1734 /*
1735   called by a daemon to inform us of the entire list of TCP tickles for
1736   a particular public address.
1737   this control should only be sent by the node that is currently serving
1738   that public address.
1739  */
1740 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1741 {
1742         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1743         struct ctdb_tcp_array *tcparray;
1744         struct ctdb_vnn *vnn;
1745
1746         /* We must at least have tickles.num or else we cant verify the size
1747            of the received data blob
1748          */
1749         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1750                                         tickles.connections)) {
1751                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1752                 return -1;
1753         }
1754
1755         /* verify that the size of data matches what we expect */
1756         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1757                                 tickles.connections)
1758                          + sizeof(struct ctdb_tcp_connection)
1759                                  * list->tickles.num) {
1760                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1761                 return -1;
1762         }       
1763
1764         vnn = find_public_ip_vnn(ctdb, &list->addr);
1765         if (vnn == NULL) {
1766                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1767                         ctdb_addr_to_str(&list->addr)));
1768
1769                 return 1;
1770         }
1771
1772         /* remove any old ticklelist we might have */
1773         talloc_free(vnn->tcp_array);
1774         vnn->tcp_array = NULL;
1775
1776         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1777         CTDB_NO_MEMORY(ctdb, tcparray);
1778
1779         tcparray->num = list->tickles.num;
1780
1781         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1782         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1783
1784         memcpy(tcparray->connections, &list->tickles.connections[0], 
1785                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1786
1787         /* We now have a new fresh tickle list array for this vnn */
1788         vnn->tcp_array = talloc_steal(vnn, tcparray);
1789         
1790         return 0;
1791 }
1792
1793 /*
1794   called to return the full list of tickles for the puclic address associated 
1795   with the provided vnn
1796  */
1797 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1798 {
1799         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1800         struct ctdb_control_tcp_tickle_list *list;
1801         struct ctdb_tcp_array *tcparray;
1802         int num;
1803         struct ctdb_vnn *vnn;
1804
1805         vnn = find_public_ip_vnn(ctdb, addr);
1806         if (vnn == NULL) {
1807                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1808                         ctdb_addr_to_str(addr)));
1809
1810                 return 1;
1811         }
1812
1813         tcparray = vnn->tcp_array;
1814         if (tcparray) {
1815                 num = tcparray->num;
1816         } else {
1817                 num = 0;
1818         }
1819
1820         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1821                                 tickles.connections)
1822                         + sizeof(struct ctdb_tcp_connection) * num;
1823
1824         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1825         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1826         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1827
1828         list->addr = *addr;
1829         list->tickles.num = num;
1830         if (num) {
1831                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1832                         sizeof(struct ctdb_tcp_connection) * num);
1833         }
1834
1835         return 0;
1836 }
1837
1838
1839 /*
1840   set the list of all tcp tickles for a public address
1841  */
1842 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1843                               struct timeval timeout, uint32_t destnode, 
1844                               ctdb_sock_addr *addr,
1845                               struct ctdb_tcp_array *tcparray)
1846 {
1847         int ret, num;
1848         TDB_DATA data;
1849         struct ctdb_control_tcp_tickle_list *list;
1850
1851         if (tcparray) {
1852                 num = tcparray->num;
1853         } else {
1854                 num = 0;
1855         }
1856
1857         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1858                                 tickles.connections) +
1859                         sizeof(struct ctdb_tcp_connection) * num;
1860         data.dptr = talloc_size(ctdb, data.dsize);
1861         CTDB_NO_MEMORY(ctdb, data.dptr);
1862
1863         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1864         list->addr = *addr;
1865         list->tickles.num = num;
1866         if (tcparray) {
1867                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1868         }
1869
1870         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1871                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1872                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1873         if (ret != 0) {
1874                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1875                 return -1;
1876         }
1877
1878         talloc_free(data.dptr);
1879
1880         return ret;
1881 }
1882
1883
1884 /*
1885   perform tickle updates if required
1886  */
1887 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1888                                 struct timed_event *te, 
1889                                 struct timeval t, void *private_data)
1890 {
1891         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1892         int ret;
1893         struct ctdb_vnn *vnn;
1894
1895         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1896                 /* we only send out updates for public addresses that 
1897                    we have taken over
1898                  */
1899                 if (ctdb->pnn != vnn->pnn) {
1900                         continue;
1901                 }
1902                 /* We only send out the updates if we need to */
1903                 if (!vnn->tcp_update_needed) {
1904                         continue;
1905                 }
1906                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1907                                 TAKEOVER_TIMEOUT(),
1908                                 CTDB_BROADCAST_CONNECTED,
1909                                 &vnn->public_address,
1910                                 vnn->tcp_array);
1911                 if (ret != 0) {
1912                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1913                                 ctdb_addr_to_str(&vnn->public_address)));
1914                 }
1915         }
1916
1917         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1918                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1919                              ctdb_update_tcp_tickles, ctdb);
1920 }               
1921         
1922
1923 /*
1924   start periodic update of tcp tickles
1925  */
1926 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1927 {
1928         ctdb->tickle_update_context = talloc_new(ctdb);
1929
1930         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1931                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1932                              ctdb_update_tcp_tickles, ctdb);
1933 }
1934
1935
1936
1937
1938 struct control_gratious_arp {
1939         struct ctdb_context *ctdb;
1940         ctdb_sock_addr addr;
1941         const char *iface;
1942         int count;
1943 };
1944
1945 /*
1946   send a control_gratuitous arp
1947  */
1948 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1949                                   struct timeval t, void *private_data)
1950 {
1951         int ret;
1952         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1953                                                         struct control_gratious_arp);
1954
1955         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1956         if (ret != 0) {
1957                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1958         }
1959
1960
1961         arp->count++;
1962         if (arp->count == CTDB_ARP_REPEAT) {
1963                 talloc_free(arp);
1964                 return;
1965         }
1966
1967         event_add_timed(arp->ctdb->ev, arp, 
1968                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
1969                         send_gratious_arp, arp);
1970 }
1971
1972
1973 /*
1974   send a gratious arp 
1975  */
1976 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1977 {
1978         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1979         struct control_gratious_arp *arp;
1980
1981         /* verify the size of indata */
1982         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1983                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
1984                                  (unsigned)indata.dsize, 
1985                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1986                 return -1;
1987         }
1988         if (indata.dsize != 
1989                 ( offsetof(struct ctdb_control_gratious_arp, iface)
1990                 + gratious_arp->len ) ){
1991
1992                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1993                         "but should be %u bytes\n", 
1994                          (unsigned)indata.dsize, 
1995                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
1996                 return -1;
1997         }
1998
1999
2000         arp = talloc(ctdb, struct control_gratious_arp);
2001         CTDB_NO_MEMORY(ctdb, arp);
2002
2003         arp->ctdb  = ctdb;
2004         arp->addr   = gratious_arp->addr;
2005         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2006         CTDB_NO_MEMORY(ctdb, arp->iface);
2007         arp->count = 0;
2008         
2009         event_add_timed(arp->ctdb->ev, arp, 
2010                         timeval_zero(), send_gratious_arp, arp);
2011
2012         return 0;
2013 }
2014
2015 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2016 {
2017         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2018
2019
2020         /* verify the size of indata */
2021         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2022                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2023                 return -1;
2024         }
2025         if (indata.dsize != 
2026                 ( offsetof(struct ctdb_control_ip_iface, iface)
2027                 + pub->len ) ){
2028
2029                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2030                         "but should be %u bytes\n", 
2031                          (unsigned)indata.dsize, 
2032                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2033                 return -1;
2034         }
2035
2036         return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2037 }
2038
2039 /*
2040   called when releaseip event finishes for del_public_address
2041  */
2042 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2043                                 void *private_data)
2044 {
2045         talloc_free(private_data);
2046 }
2047
2048 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2049 {
2050         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2051         struct ctdb_vnn *vnn;
2052         int ret;
2053
2054         /* verify the size of indata */
2055         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2056                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2057                 return -1;
2058         }
2059         if (indata.dsize != 
2060                 ( offsetof(struct ctdb_control_ip_iface, iface)
2061                 + pub->len ) ){
2062
2063                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2064                         "but should be %u bytes\n", 
2065                          (unsigned)indata.dsize, 
2066                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2067                 return -1;
2068         }
2069
2070         /* walk over all public addresses until we find a match */
2071         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2072                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2073                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2074
2075                         DLIST_REMOVE(ctdb->vnn, vnn);
2076
2077                         ret = ctdb_event_script_callback(ctdb, 
2078                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2079                                          mem_ctx, delete_ip_callback, mem_ctx,
2080                                          "releaseip %s %s %u",
2081                                          vnn->iface, 
2082                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2083                                          vnn->public_netmask_bits);
2084                         talloc_free(vnn);
2085                         if (ret != 0) {
2086                                 return -1;
2087                         }
2088                         return 0;
2089                 }
2090         }
2091
2092         return -1;
2093 }
2094