add a context and a timed event so that once we have been in recovery
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(ctdb);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(ctdb, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239                                          state, takeover_ip_callback, state,
240                                          "takeip %s %s %u",
241                                          vnn->iface, 
242                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243                                          vnn->public_netmask_bits);
244
245         if (ret != 0) {
246                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247                         ctdb_addr_to_str(&pip->addr),
248                         vnn->iface));
249                 talloc_free(state);
250                 return -1;
251         }
252
253         /* tell ctdb_control.c that we will be replying asynchronously */
254         *async_reply = true;
255
256         return 0;
257 }
258
259 /*
260   takeover an ip address old v4 style
261  */
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
263                                 struct ctdb_req_control *c,
264                                 TDB_DATA indata, 
265                                 bool *async_reply)
266 {
267         TDB_DATA data;
268         
269         data.dsize = sizeof(struct ctdb_public_ip);
270         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271         CTDB_NO_MEMORY(ctdb, data.dptr);
272         
273         memcpy(data.dptr, indata.dptr, indata.dsize);
274         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
275 }
276
277 /*
278   kill any clients that are registered with a IP that is being released
279  */
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
281 {
282         struct ctdb_client_ip *ip;
283
284         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285                 ctdb_addr_to_str(addr)));
286
287         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288                 ctdb_sock_addr tmp_addr;
289
290                 tmp_addr = ip->addr;
291                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
292                         ip->client_id,
293                         ctdb_addr_to_str(&ip->addr)));
294
295                 if (ctdb_same_ip(&tmp_addr, addr)) {
296                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
297                                                                      ip->client_id, 
298                                                                      struct ctdb_client);
299                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
300                                 ip->client_id,
301                                 ctdb_addr_to_str(&ip->addr),
302                                 client->pid));
303
304                         if (client->pid != 0) {
305                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306                                         (unsigned)client->pid,
307                                         ctdb_addr_to_str(addr),
308                                         ip->client_id));
309                                 kill(client->pid, SIGKILL);
310                         }
311                 }
312         }
313 }
314
315 /*
316   called when releaseip event finishes
317  */
318 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
319                                 void *private_data)
320 {
321         struct takeover_callback_state *state = 
322                 talloc_get_type(private_data, struct takeover_callback_state);
323         TDB_DATA data;
324
325         /* send a message to all clients of this node telling them
326            that the cluster has been reconfigured and they should
327            release any sockets on this IP */
328         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329         data.dsize = strlen((char *)data.dptr)+1;
330
331         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
332
333         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
334
335         /* kill clients that have registered with this IP */
336         release_kill_clients(ctdb, state->addr);
337         
338         /* the control succeeded */
339         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
340         talloc_free(state);
341 }
342
343 /*
344   release an ip address
345  */
346 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
347                                 struct ctdb_req_control *c,
348                                 TDB_DATA indata, 
349                                 bool *async_reply)
350 {
351         int ret;
352         struct takeover_callback_state *state;
353         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
354         struct ctdb_vnn *vnn;
355
356         /* update our vnn list */
357         vnn = find_public_ip_vnn(ctdb, &pip->addr);
358         if (vnn == NULL) {
359                 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
360                         ctdb_addr_to_str(&pip->addr)));
361                 return 0;
362         }
363         vnn->pnn = pip->pnn;
364
365         /* stop any previous arps */
366         talloc_free(vnn->takeover_ctx);
367         vnn->takeover_ctx = NULL;
368
369         if (!ctdb_sys_have_ip(&pip->addr)) {
370                 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
371                         ctdb_addr_to_str(&pip->addr),
372                         vnn->public_netmask_bits, 
373                         vnn->iface));
374                 return 0;
375         }
376
377         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
378                 ctdb_addr_to_str(&pip->addr),
379                 vnn->public_netmask_bits, 
380                 vnn->iface));
381
382         state = talloc(ctdb, struct takeover_callback_state);
383         CTDB_NO_MEMORY(ctdb, state);
384
385         state->c = talloc_steal(state, c);
386         state->addr = talloc(state, ctdb_sock_addr);       
387         CTDB_NO_MEMORY(ctdb, state->addr);
388         *state->addr = pip->addr;
389         state->vnn   = vnn;
390
391         ret = ctdb_event_script_callback(ctdb, 
392                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
393                                          state, release_ip_callback, state,
394                                          "releaseip %s %s %u",
395                                          vnn->iface, 
396                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
397                                          vnn->public_netmask_bits);
398         if (ret != 0) {
399                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
400                         ctdb_addr_to_str(&pip->addr),
401                         vnn->iface));
402                 talloc_free(state);
403                 return -1;
404         }
405
406         /* tell the control that we will be reply asynchronously */
407         *async_reply = true;
408         return 0;
409 }
410
411 /*
412   release an ip address old v4 style
413  */
414 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
415                                 struct ctdb_req_control *c,
416                                 TDB_DATA indata, 
417                                 bool *async_reply)
418 {
419         TDB_DATA data;
420         
421         data.dsize = sizeof(struct ctdb_public_ip);
422         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
423         CTDB_NO_MEMORY(ctdb, data.dptr);
424         
425         memcpy(data.dptr, indata.dptr, indata.dsize);
426         return ctdb_control_release_ip(ctdb, c, data, async_reply);
427 }
428
429
430 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
431 {
432         struct ctdb_vnn      *vnn;
433
434         /* Verify that we dont have an entry for this ip yet */
435         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
436                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
437                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
438                                 ctdb_addr_to_str(addr)));
439                         return -1;
440                 }               
441         }
442
443         /* create a new vnn structure for this ip address */
444         vnn = talloc_zero(ctdb, struct ctdb_vnn);
445         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
446         vnn->iface = talloc_strdup(vnn, iface);
447         vnn->public_address      = *addr;
448         vnn->public_netmask_bits = mask;
449         vnn->pnn                 = -1;
450         
451         DLIST_ADD(ctdb->vnn, vnn);
452
453         return 0;
454 }
455
456
457 /*
458   setup the event script directory
459 */
460 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
461 {
462         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
463         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
464         return 0;
465 }
466
467 /*
468   setup the public address lists from a file
469 */
470 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
471 {
472         char **lines;
473         int nlines;
474         int i;
475
476         lines = file_lines_load(alist, &nlines, ctdb);
477         if (lines == NULL) {
478                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
479                 return -1;
480         }
481         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
482                 nlines--;
483         }
484
485         for (i=0;i<nlines;i++) {
486                 unsigned mask;
487                 ctdb_sock_addr addr;
488                 const char *iface;
489                 char *tok, *line;
490
491                 line = lines[i];
492                 while ((*line == ' ') || (*line == '\t')) {
493                         line++;
494                 }
495                 if (*line == '#') {
496                         continue;
497                 }
498                 if (strcmp(line, "") == 0) {
499                         continue;
500                 }
501                 tok = strtok(line, " \t");
502                 if (!tok || !parse_ip_mask(tok, &addr, &mask)) {
503                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
504                         talloc_free(lines);
505                         return -1;
506                 }
507                 tok = strtok(NULL, " \t");
508                 if (tok == NULL) {
509                         if (NULL == ctdb->default_public_interface) {
510                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
511                                          i+1));
512                                 talloc_free(lines);
513                                 return -1;
514                         }
515                         iface = ctdb->default_public_interface;
516                 } else {
517                         iface = tok;
518                 }
519
520                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
521                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
522                         talloc_free(lines);
523                         return -1;
524                 }
525         }
526
527         talloc_free(lines);
528         return 0;
529 }
530
531
532
533
534 struct ctdb_public_ip_list {
535         struct ctdb_public_ip_list *next;
536         uint32_t pnn;
537         ctdb_sock_addr addr;
538 };
539
540
541 /* Given a physical node, return the number of
542    public addresses that is currently assigned to this node.
543 */
544 static int node_ip_coverage(struct ctdb_context *ctdb, 
545         int32_t pnn,
546         struct ctdb_public_ip_list *ips)
547 {
548         int num=0;
549
550         for (;ips;ips=ips->next) {
551                 if (ips->pnn == pnn) {
552                         num++;
553                 }
554         }
555         return num;
556 }
557
558
559 /* Check if this is a public ip known to the node, i.e. can that
560    node takeover this ip ?
561 */
562 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
563                 struct ctdb_public_ip_list *ip)
564 {
565         struct ctdb_all_public_ips *public_ips;
566         int i;
567
568         public_ips = ctdb->nodes[pnn]->public_ips;
569
570         if (public_ips == NULL) {
571                 return -1;
572         }
573
574         for (i=0;i<public_ips->num;i++) {
575                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
576                         /* yes, this node can serve this public ip */
577                         return 0;
578                 }
579         }
580
581         return -1;
582 }
583
584
585 /* search the node lists list for a node to takeover this ip.
586    pick the node that currently are serving the least number of ips
587    so that the ips get spread out evenly.
588 */
589 static int find_takeover_node(struct ctdb_context *ctdb, 
590                 struct ctdb_node_map *nodemap, uint32_t mask, 
591                 struct ctdb_public_ip_list *ip,
592                 struct ctdb_public_ip_list *all_ips)
593 {
594         int pnn, min=0, num;
595         int i;
596
597         pnn    = -1;
598         for (i=0;i<nodemap->num;i++) {
599                 if (nodemap->nodes[i].flags & mask) {
600                         /* This node is not healty and can not be used to serve
601                            a public address 
602                         */
603                         continue;
604                 }
605
606                 /* verify that this node can serve this ip */
607                 if (can_node_serve_ip(ctdb, i, ip)) {
608                         /* no it couldnt   so skip to the next node */
609                         continue;
610                 }
611
612                 num = node_ip_coverage(ctdb, i, all_ips);
613                 /* was this the first node we checked ? */
614                 if (pnn == -1) {
615                         pnn = i;
616                         min  = num;
617                 } else {
618                         if (num < min) {
619                                 pnn = i;
620                                 min  = num;
621                         }
622                 }
623         }       
624         if (pnn == -1) {
625                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
626                         ctdb_addr_to_str(&ip->addr)));
627
628                 return -1;
629         }
630
631         ip->pnn = pnn;
632         return 0;
633 }
634
635 struct ctdb_public_ip_list *
636 add_ip_to_merged_list(struct ctdb_context *ctdb,
637                         TALLOC_CTX *tmp_ctx, 
638                         struct ctdb_public_ip_list *ip_list, 
639                         struct ctdb_public_ip *ip)
640 {
641         struct ctdb_public_ip_list *tmp_ip; 
642
643         /* do we already have this ip in our merged list ?*/
644         for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
645
646                 /* we already have this public ip in the list */
647                 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
648                         return ip_list;
649                 }
650         }
651
652         /* this is a new public ip, we must add it to the list */
653         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
654         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
655         tmp_ip->pnn  = ip->pnn;
656         tmp_ip->addr = ip->addr;
657         tmp_ip->next = ip_list;
658
659         return tmp_ip;
660 }
661
662 struct ctdb_public_ip_list *
663 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
664 {
665         int i, j;
666         struct ctdb_public_ip_list *ip_list = NULL;
667         struct ctdb_all_public_ips *public_ips;
668
669         for (i=0;i<ctdb->num_nodes;i++) {
670                 public_ips = ctdb->nodes[i]->public_ips;
671
672                 /* there were no public ips for this node */
673                 if (public_ips == NULL) {
674                         continue;
675                 }               
676
677                 for (j=0;j<public_ips->num;j++) {
678                         ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
679                                         ip_list, &public_ips->ips[j]);
680                 }
681         }
682
683         return ip_list;
684 }
685
686 /*
687   make any IP alias changes for public addresses that are necessary 
688  */
689 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
690 {
691         int i, num_healthy, retries;
692         struct ctdb_public_ip ip;
693         struct ctdb_public_ipv4 ipv4;
694         uint32_t mask;
695         struct ctdb_public_ip_list *all_ips, *tmp_ip;
696         int maxnode, maxnum=0, minnode, minnum=0, num;
697         TDB_DATA data;
698         struct timeval timeout;
699         struct client_async_data *async_data;
700         struct ctdb_client_control_state *state;
701         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
702
703
704         ZERO_STRUCT(ip);
705
706         /* Count how many completely healthy nodes we have */
707         num_healthy = 0;
708         for (i=0;i<nodemap->num;i++) {
709                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
710                         num_healthy++;
711                 }
712         }
713
714         if (num_healthy > 0) {
715                 /* We have healthy nodes, so only consider them for 
716                    serving public addresses
717                 */
718                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
719         } else {
720                 /* We didnt have any completely healthy nodes so
721                    use "disabled" nodes as a fallback
722                 */
723                 mask = NODE_FLAGS_INACTIVE;
724         }
725
726         /* since nodes only know about those public addresses that
727            can be served by that particular node, no single node has
728            a full list of all public addresses that exist in the cluster.
729            Walk over all node structures and create a merged list of
730            all public addresses that exist in the cluster.
731         */
732         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
733
734         /* If we want deterministic ip allocations, i.e. that the ip addresses
735            will always be allocated the same way for a specific set of
736            available/unavailable nodes.
737         */
738         if (1 == ctdb->tunable.deterministic_public_ips) {              
739                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
740                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
741                         tmp_ip->pnn = i%nodemap->num;
742                 }
743         }
744
745
746         /* mark all public addresses with a masked node as being served by
747            node -1
748         */
749         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
750                 if (tmp_ip->pnn == -1) {
751                         continue;
752                 }
753                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
754                         tmp_ip->pnn = -1;
755                 }
756         }
757
758         /* verify that the assigned nodes can serve that public ip
759            and set it to -1 if not
760         */
761         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
762                 if (tmp_ip->pnn == -1) {
763                         continue;
764                 }
765                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
766                         /* this node can not serve this ip. */
767                         tmp_ip->pnn = -1;
768                 }
769         }
770
771
772         /* now we must redistribute all public addresses with takeover node
773            -1 among the nodes available
774         */
775         retries = 0;
776 try_again:
777         /* loop over all ip's and find a physical node to cover for 
778            each unassigned ip.
779         */
780         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
781                 if (tmp_ip->pnn == -1) {
782                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
783                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
784                                         ctdb_addr_to_str(&tmp_ip->addr)));
785                         }
786                 }
787         }
788
789         /* If we dont want ips to fail back after a node becomes healthy
790            again, we wont even try to reallocat the ip addresses so that
791            they are evenly spread out.
792            This can NOT be used at the same time as DeterministicIPs !
793         */
794         if (1 == ctdb->tunable.no_ip_failback) {
795                 if (1 == ctdb->tunable.deterministic_public_ips) {
796                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
797                 }
798                 goto finished;
799         }
800
801
802         /* now, try to make sure the ip adresses are evenly distributed
803            across the node.
804            for each ip address, loop over all nodes that can serve this
805            ip and make sure that the difference between the node
806            serving the most and the node serving the least ip's are not greater
807            than 1.
808         */
809         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
810                 if (tmp_ip->pnn == -1) {
811                         continue;
812                 }
813
814                 /* Get the highest and lowest number of ips's served by any 
815                    valid node which can serve this ip.
816                 */
817                 maxnode = -1;
818                 minnode = -1;
819                 for (i=0;i<nodemap->num;i++) {
820                         if (nodemap->nodes[i].flags & mask) {
821                                 continue;
822                         }
823
824                         /* only check nodes that can actually serve this ip */
825                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
826                                 /* no it couldnt   so skip to the next node */
827                                 continue;
828                         }
829
830                         num = node_ip_coverage(ctdb, i, all_ips);
831                         if (maxnode == -1) {
832                                 maxnode = i;
833                                 maxnum  = num;
834                         } else {
835                                 if (num > maxnum) {
836                                         maxnode = i;
837                                         maxnum  = num;
838                                 }
839                         }
840                         if (minnode == -1) {
841                                 minnode = i;
842                                 minnum  = num;
843                         } else {
844                                 if (num < minnum) {
845                                         minnode = i;
846                                         minnum  = num;
847                                 }
848                         }
849                 }
850                 if (maxnode == -1) {
851                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
852                                 ctdb_addr_to_str(&tmp_ip->addr)));
853
854                         continue;
855                 }
856
857                 /* If we want deterministic IPs then dont try to reallocate 
858                    them to spread out the load.
859                 */
860                 if (1 == ctdb->tunable.deterministic_public_ips) {
861                         continue;
862                 }
863
864                 /* if the spread between the smallest and largest coverage by
865                    a node is >=2 we steal one of the ips from the node with
866                    most coverage to even things out a bit.
867                    try to do this at most 5 times  since we dont want to spend
868                    too much time balancing the ip coverage.
869                 */
870                 if ( (maxnum > minnum+1)
871                   && (retries < 5) ){
872                         struct ctdb_public_ip_list *tmp;
873
874                         /* mark one of maxnode's vnn's as unassigned and try
875                            again
876                         */
877                         for (tmp=all_ips;tmp;tmp=tmp->next) {
878                                 if (tmp->pnn == maxnode) {
879                                         tmp->pnn = -1;
880                                         retries++;
881                                         goto try_again;
882                                 }
883                         }
884                 }
885         }
886
887
888         /* finished distributing the public addresses, now just send the 
889            info out to the nodes
890         */
891 finished:
892
893         /* at this point ->pnn is the node which will own each IP
894            or -1 if there is no node that can cover this ip
895         */
896
897         /* now tell all nodes to delete any alias that they should not
898            have.  This will be a NOOP on nodes that don't currently
899            hold the given alias */
900         async_data = talloc_zero(tmp_ctx, struct client_async_data);
901         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
902
903         for (i=0;i<nodemap->num;i++) {
904                 /* don't talk to unconnected nodes, but do talk to banned nodes */
905                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
906                         continue;
907                 }
908
909                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
910                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
911                                 /* This node should be serving this
912                                    vnn so dont tell it to release the ip
913                                 */
914                                 continue;
915                         }
916                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
917                                 ipv4.pnn = tmp_ip->pnn;
918                                 ipv4.sin = tmp_ip->addr.ip;
919
920                                 timeout = TAKEOVER_TIMEOUT();
921                                 data.dsize = sizeof(ipv4);
922                                 data.dptr  = (uint8_t *)&ipv4;
923                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
924                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
925                                                 data, async_data,
926                                                 &timeout, NULL);
927                         } else {
928                                 ip.pnn  = tmp_ip->pnn;
929                                 ip.addr = tmp_ip->addr;
930
931                                 timeout = TAKEOVER_TIMEOUT();
932                                 data.dsize = sizeof(ip);
933                                 data.dptr  = (uint8_t *)&ip;
934                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
935                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
936                                                 data, async_data,
937                                                 &timeout, NULL);
938                         }
939
940                         if (state == NULL) {
941                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
942                                 talloc_free(tmp_ctx);
943                                 return -1;
944                         }
945                 
946                         ctdb_client_async_add(async_data, state);
947                 }
948         }
949         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
950                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
951                 talloc_free(tmp_ctx);
952                 return -1;
953         }
954         talloc_free(async_data);
955
956
957         /* tell all nodes to get their own IPs */
958         async_data = talloc_zero(tmp_ctx, struct client_async_data);
959         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
960         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
961                 if (tmp_ip->pnn == -1) {
962                         /* this IP won't be taken over */
963                         continue;
964                 }
965
966                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
967                         ipv4.pnn = tmp_ip->pnn;
968                         ipv4.sin = tmp_ip->addr.ip;
969
970                         timeout = TAKEOVER_TIMEOUT();
971                         data.dsize = sizeof(ipv4);
972                         data.dptr  = (uint8_t *)&ipv4;
973                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
974                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
975                                         data, async_data,
976                                         &timeout, NULL);
977                 } else {
978                         ip.pnn  = tmp_ip->pnn;
979                         ip.addr = tmp_ip->addr;
980
981                         timeout = TAKEOVER_TIMEOUT();
982                         data.dsize = sizeof(ip);
983                         data.dptr  = (uint8_t *)&ip;
984                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
985                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
986                                         data, async_data,
987                                         &timeout, NULL);
988                 }
989                 if (state == NULL) {
990                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
991                         talloc_free(tmp_ctx);
992                         return -1;
993                 }
994                 
995                 ctdb_client_async_add(async_data, state);
996         }
997         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
998                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
999                 talloc_free(tmp_ctx);
1000                 return -1;
1001         }
1002
1003         talloc_free(tmp_ctx);
1004         return 0;
1005 }
1006
1007
1008 /*
1009   destroy a ctdb_client_ip structure
1010  */
1011 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1012 {
1013         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1014                 ctdb_addr_to_str(&ip->addr),
1015                 ntohs(ip->addr.ip.sin_port),
1016                 ip->client_id));
1017
1018         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1019         return 0;
1020 }
1021
1022 /*
1023   called by a client to inform us of a TCP connection that it is managing
1024   that should tickled with an ACK when IP takeover is done
1025  */
1026 //qqq we need a new version of this control that takes ctdb_sock_addr
1027 //and have samba move to that instead.
1028 // This is IPV4 ONLY
1029 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1030                                 TDB_DATA indata)
1031 {
1032         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1033         struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
1034         struct ctdb_tcp_list *tcp;
1035         struct ctdb_control_tcp_vnn t;
1036         int ret;
1037         TDB_DATA data;
1038         struct ctdb_client_ip *ip;
1039         struct ctdb_vnn *vnn;
1040         ctdb_sock_addr addr;
1041
1042         ZERO_STRUCT(addr);
1043         addr.ip = p->dest;
1044         vnn = find_public_ip_vnn(ctdb, &addr);
1045         if (vnn == NULL) {
1046                 if (ntohl(p->dest.sin_addr.s_addr) != INADDR_LOOPBACK) {
1047                         DEBUG(DEBUG_INFO,("Could not add client IP %s. This is not a public address.\n", 
1048                                 ctdb_addr_to_str((ctdb_sock_addr *)&p->dest)));
1049                 }
1050                 return 0;
1051         }
1052
1053         if (vnn->pnn != ctdb->pnn) {
1054                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1055                         ctdb_addr_to_str((ctdb_sock_addr *)&p->dest),
1056                         client_id, client->pid));
1057                 /* failing this call will tell smbd to die */
1058                 return -1;
1059         }
1060
1061         ip = talloc(client, struct ctdb_client_ip);
1062         CTDB_NO_MEMORY(ctdb, ip);
1063
1064         ip->ctdb      = ctdb;
1065         ip->addr.ip   = p->dest;
1066         ip->client_id = client_id;
1067         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1068         DLIST_ADD(ctdb->client_ip_list, ip);
1069
1070         tcp = talloc(client, struct ctdb_tcp_list);
1071         CTDB_NO_MEMORY(ctdb, tcp);
1072
1073         tcp->connection.src_addr.ip = p->src;
1074         tcp->connection.dst_addr.ip = p->dest;
1075
1076         DLIST_ADD(client->tcp_list, tcp);
1077
1078         t.src.ip  = p->src;
1079         t.dest.ip = p->dest;
1080
1081         data.dptr = (uint8_t *)&t;
1082         data.dsize = sizeof(t);
1083
1084         DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1085                 (unsigned)ntohs(p->dest.sin_port), 
1086                 ctdb_addr_to_str((ctdb_sock_addr *)&p->src),
1087                 (unsigned)ntohs(p->src.sin_port), client_id, client->pid));
1088
1089         /* tell all nodes about this tcp connection */
1090         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1091                                        CTDB_CONTROL_TCP_ADD,
1092                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1093         if (ret != 0) {
1094                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1095                 return -1;
1096         }
1097
1098         return 0;
1099 }
1100
1101 /*
1102   find a tcp address on a list
1103  */
1104 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1105                                            struct ctdb_tcp_connection *tcp)
1106 {
1107         int i;
1108
1109         if (array == NULL) {
1110                 return NULL;
1111         }
1112
1113         for (i=0;i<array->num;i++) {
1114                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1115                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1116                         return &array->connections[i];
1117                 }
1118         }
1119         return NULL;
1120 }
1121
1122 /*
1123   called by a daemon to inform us of a TCP connection that one of its
1124   clients managing that should tickled with an ACK when IP takeover is
1125   done
1126  */
1127 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1128 {
1129         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1130         struct ctdb_tcp_array *tcparray;
1131         struct ctdb_tcp_connection tcp;
1132         struct ctdb_vnn *vnn;
1133
1134         vnn = find_public_ip_vnn(ctdb, &p->dest);
1135         if (vnn == NULL) {
1136                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1137                         ctdb_addr_to_str(&p->dest)));
1138
1139                 return -1;
1140         }
1141
1142
1143         tcparray = vnn->tcp_array;
1144
1145         /* If this is the first tickle */
1146         if (tcparray == NULL) {
1147                 tcparray = talloc_size(ctdb->nodes, 
1148                         offsetof(struct ctdb_tcp_array, connections) +
1149                         sizeof(struct ctdb_tcp_connection) * 1);
1150                 CTDB_NO_MEMORY(ctdb, tcparray);
1151                 vnn->tcp_array = tcparray;
1152
1153                 tcparray->num = 0;
1154                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1155                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1156
1157                 tcparray->connections[tcparray->num].src_addr = p->src;
1158                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1159                 tcparray->num++;
1160                 return 0;
1161         }
1162
1163
1164         /* Do we already have this tickle ?*/
1165         tcp.src_addr = p->src;
1166         tcp.dst_addr = p->dest;
1167         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1168                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1169                         ctdb_addr_to_str(&tcp.dst_addr),
1170                         ntohs(tcp.dst_addr.ip.sin_port),
1171                         vnn->pnn));
1172                 return 0;
1173         }
1174
1175         /* A new tickle, we must add it to the array */
1176         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1177                                         struct ctdb_tcp_connection,
1178                                         tcparray->num+1);
1179         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1180
1181         vnn->tcp_array = tcparray;
1182         tcparray->connections[tcparray->num].src_addr = p->src;
1183         tcparray->connections[tcparray->num].dst_addr = p->dest;
1184         tcparray->num++;
1185                                 
1186         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1187                 ctdb_addr_to_str(&tcp.dst_addr),
1188                 ntohs(tcp.dst_addr.ip.sin_port),
1189                 vnn->pnn));
1190
1191         return 0;
1192 }
1193
1194
1195 /*
1196   called by a daemon to inform us of a TCP connection that one of its
1197   clients managing that should tickled with an ACK when IP takeover is
1198   done
1199  */
1200 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1201 {
1202         struct ctdb_tcp_connection *tcpp;
1203         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1204
1205         if (vnn == NULL) {
1206                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1207                         ctdb_addr_to_str(&conn->dst_addr)));
1208                 return;
1209         }
1210
1211         /* if the array is empty we cant remove it
1212            and we dont need to do anything
1213          */
1214         if (vnn->tcp_array == NULL) {
1215                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1216                         ctdb_addr_to_str(&conn->dst_addr),
1217                         ntohs(conn->dst_addr.ip.sin_port)));
1218                 return;
1219         }
1220
1221
1222         /* See if we know this connection
1223            if we dont know this connection  then we dont need to do anything
1224          */
1225         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1226         if (tcpp == NULL) {
1227                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1228                         ctdb_addr_to_str(&conn->dst_addr),
1229                         ntohs(conn->dst_addr.ip.sin_port)));
1230                 return;
1231         }
1232
1233
1234         /* We need to remove this entry from the array.
1235            Instead of allocating a new array and copying data to it
1236            we cheat and just copy the last entry in the existing array
1237            to the entry that is to be removed and just shring the 
1238            ->num field
1239          */
1240         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1241         vnn->tcp_array->num--;
1242
1243         /* If we deleted the last entry we also need to remove the entire array
1244          */
1245         if (vnn->tcp_array->num == 0) {
1246                 talloc_free(vnn->tcp_array);
1247                 vnn->tcp_array = NULL;
1248         }               
1249
1250         vnn->tcp_update_needed = true;
1251
1252         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1253                 ctdb_addr_to_str(&conn->src_addr),
1254                 ntohs(conn->src_addr.ip.sin_port)));
1255 }
1256
1257
1258 /*
1259   called when a daemon restarts - send all tickes for all public addresses
1260   we are serving immediately to the new node.
1261  */
1262 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1263 {
1264 /*XXX here we should send all tickes we are serving to the new node */
1265         return 0;
1266 }
1267
1268
1269 /*
1270   called when a client structure goes away - hook to remove
1271   elements from the tcp_list in all daemons
1272  */
1273 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1274 {
1275         while (client->tcp_list) {
1276                 struct ctdb_tcp_list *tcp = client->tcp_list;
1277                 DLIST_REMOVE(client->tcp_list, tcp);
1278                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1279         }
1280 }
1281
1282
1283 /*
1284   release all IPs on shutdown
1285  */
1286 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1287 {
1288         struct ctdb_vnn *vnn;
1289
1290         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1291                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1292                         continue;
1293                 }
1294                 if (vnn->pnn == ctdb->pnn) {
1295                         vnn->pnn = -1;
1296                 }
1297                 ctdb_event_script(ctdb, "releaseip %s %s %u",
1298                                   vnn->iface, 
1299                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1300                                   vnn->public_netmask_bits);
1301                 release_kill_clients(ctdb, &vnn->public_address);
1302         }
1303 }
1304
1305
1306 /*
1307   get list of public IPs
1308  */
1309 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1310                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1311 {
1312         int i, num, len;
1313         struct ctdb_all_public_ips *ips;
1314         struct ctdb_vnn *vnn;
1315
1316         /* count how many public ip structures we have */
1317         num = 0;
1318         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1319                 num++;
1320         }
1321
1322         len = offsetof(struct ctdb_all_public_ips, ips) + 
1323                 num*sizeof(struct ctdb_public_ip);
1324         ips = talloc_zero_size(outdata, len);
1325         CTDB_NO_MEMORY(ctdb, ips);
1326
1327         outdata->dsize = len;
1328         outdata->dptr  = (uint8_t *)ips;
1329
1330         ips->num = num;
1331         i = 0;
1332         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1333                 ips->ips[i].pnn  = vnn->pnn;
1334                 ips->ips[i].addr = vnn->public_address;
1335                 i++;
1336         }
1337
1338         return 0;
1339 }
1340
1341
1342 /*
1343   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1344  */
1345 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1346                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1347 {
1348         int i, num, len;
1349         struct ctdb_all_public_ipsv4 *ips;
1350         struct ctdb_vnn *vnn;
1351
1352         /* count how many public ip structures we have */
1353         num = 0;
1354         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1355                 if (vnn->public_address.sa.sa_family != AF_INET) {
1356                         continue;
1357                 }
1358                 num++;
1359         }
1360
1361         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1362                 num*sizeof(struct ctdb_public_ipv4);
1363         ips = talloc_zero_size(outdata, len);
1364         CTDB_NO_MEMORY(ctdb, ips);
1365
1366         outdata->dsize = len;
1367         outdata->dptr  = (uint8_t *)ips;
1368
1369         ips->num = num;
1370         i = 0;
1371         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1372                 if (vnn->public_address.sa.sa_family != AF_INET) {
1373                         continue;
1374                 }
1375                 ips->ips[i].pnn = vnn->pnn;
1376                 ips->ips[i].sin = vnn->public_address.ip;
1377                 i++;
1378         }
1379
1380         return 0;
1381 }
1382
1383
1384 /* 
1385    structure containing the listening socket and the list of tcp connections
1386    that the ctdb daemon is to kill
1387 */
1388 struct ctdb_kill_tcp {
1389         struct ctdb_vnn *vnn;
1390         struct ctdb_context *ctdb;
1391         int capture_fd;
1392         struct fd_event *fde;
1393         trbt_tree_t *connections;
1394         void *private_data;
1395 };
1396
1397 /*
1398   a tcp connection that is to be killed
1399  */
1400 struct ctdb_killtcp_con {
1401         ctdb_sock_addr src_addr;
1402         ctdb_sock_addr dst_addr;
1403         int count;
1404         struct ctdb_kill_tcp *killtcp;
1405 };
1406
1407 /* this function is used to create a key to represent this socketpair
1408    in the killtcp tree.
1409    this key is used to insert and lookup matching socketpairs that are
1410    to be tickled and RST
1411 */
1412 #define KILLTCP_KEYLEN  10
1413 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1414 {
1415         static uint32_t key[KILLTCP_KEYLEN];
1416
1417         bzero(key, sizeof(key));
1418
1419         if (src->sa.sa_family != dst->sa.sa_family) {
1420                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1421                 return key;
1422         }
1423         
1424         switch (src->sa.sa_family) {
1425         case AF_INET:
1426                 key[0]  = dst->ip.sin_addr.s_addr;
1427                 key[1]  = src->ip.sin_addr.s_addr;
1428                 key[2]  = dst->ip.sin_port;
1429                 key[3]  = src->ip.sin_port;
1430                 break;
1431         case AF_INET6:
1432                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1433                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1434                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1435                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1436                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1437                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1438                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1439                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1440                 key[8]  = dst->ip6.sin6_port;
1441                 key[9]  = src->ip6.sin6_port;
1442                 break;
1443         default:
1444                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1445                 return key;
1446         }
1447
1448         return key;
1449 }
1450
1451 /*
1452   called when we get a read event on the raw socket
1453  */
1454 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1455                                 uint16_t flags, void *private_data)
1456 {
1457         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1458         struct ctdb_killtcp_con *con;
1459         ctdb_sock_addr src, dst;
1460         uint32_t ack_seq, seq;
1461
1462         if (!(flags & EVENT_FD_READ)) {
1463                 return;
1464         }
1465
1466         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1467                                 killtcp->private_data,
1468                                 &src, &dst,
1469                                 &ack_seq, &seq) != 0) {
1470                 /* probably a non-tcp ACK packet */
1471                 return;
1472         }
1473
1474         /* check if we have this guy in our list of connections
1475            to kill
1476         */
1477         con = trbt_lookuparray32(killtcp->connections, 
1478                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1479         if (con == NULL) {
1480                 /* no this was some other packet we can just ignore */
1481                 return;
1482         }
1483
1484         /* This one has been tickled !
1485            now reset him and remove him from the list.
1486          */
1487         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1488                 ntohs(con->dst_addr.ip.sin_port),
1489                 ctdb_addr_to_str(&con->src_addr),
1490                 ntohs(con->src_addr.ip.sin_port)));
1491
1492         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1493         talloc_free(con);
1494 }
1495
1496
1497 /* when traversing the list of all tcp connections to send tickle acks to
1498    (so that we can capture the ack coming back and kill the connection
1499     by a RST)
1500    this callback is called for each connection we are currently trying to kill
1501 */
1502 static void tickle_connection_traverse(void *param, void *data)
1503 {
1504         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1505
1506         /* have tried too many times, just give up */
1507         if (con->count >= 5) {
1508                 talloc_free(con);
1509                 return;
1510         }
1511
1512         /* othervise, try tickling it again */
1513         con->count++;
1514         ctdb_sys_send_tcp(
1515                 (ctdb_sock_addr *)&con->dst_addr,
1516                 (ctdb_sock_addr *)&con->src_addr,
1517                 0, 0, 0);
1518 }
1519
1520
1521 /* 
1522    called every second until all sentenced connections have been reset
1523  */
1524 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1525                                               struct timeval t, void *private_data)
1526 {
1527         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1528
1529
1530         /* loop over all connections sending tickle ACKs */
1531         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1532
1533
1534         /* If there are no more connections to kill we can remove the
1535            entire killtcp structure
1536          */
1537         if ( (killtcp->connections == NULL) || 
1538              (killtcp->connections->root == NULL) ) {
1539                 talloc_free(killtcp);
1540                 return;
1541         }
1542
1543         /* try tickling them again in a seconds time
1544          */
1545         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1546                         ctdb_tickle_sentenced_connections, killtcp);
1547 }
1548
1549 /*
1550   destroy the killtcp structure
1551  */
1552 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1553 {
1554         killtcp->vnn->killtcp = NULL;
1555         return 0;
1556 }
1557
1558
1559 /* nothing fancy here, just unconditionally replace any existing
1560    connection structure with the new one.
1561
1562    dont even free the old one if it did exist, that one is talloc_stolen
1563    by the same node in the tree anyway and will be deleted when the new data 
1564    is deleted
1565 */
1566 static void *add_killtcp_callback(void *parm, void *data)
1567 {
1568         return parm;
1569 }
1570
1571 /*
1572   add a tcp socket to the list of connections we want to RST
1573  */
1574 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1575                                        ctdb_sock_addr *s,
1576                                        ctdb_sock_addr *d)
1577 {
1578         ctdb_sock_addr src, dst;
1579         struct ctdb_kill_tcp *killtcp;
1580         struct ctdb_killtcp_con *con;
1581         struct ctdb_vnn *vnn;
1582
1583         ctdb_canonicalize_ip(s, &src);
1584         ctdb_canonicalize_ip(d, &dst);
1585
1586         vnn = find_public_ip_vnn(ctdb, &dst);
1587         if (vnn == NULL) {
1588                 vnn = find_public_ip_vnn(ctdb, &src);
1589         }
1590         if (vnn == NULL) {
1591                 /* if it is not a public ip   it could be our 'single ip' */
1592                 if (ctdb->single_ip_vnn) {
1593                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1594                                 vnn = ctdb->single_ip_vnn;
1595                         }
1596                 }
1597         }
1598         if (vnn == NULL) {
1599                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1600                 return -1;
1601         }
1602
1603         killtcp = vnn->killtcp;
1604         
1605         /* If this is the first connection to kill we must allocate
1606            a new structure
1607          */
1608         if (killtcp == NULL) {
1609                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1610                 CTDB_NO_MEMORY(ctdb, killtcp);
1611
1612                 killtcp->vnn         = vnn;
1613                 killtcp->ctdb        = ctdb;
1614                 killtcp->capture_fd  = -1;
1615                 killtcp->connections = trbt_create(killtcp, 0);
1616
1617                 vnn->killtcp         = killtcp;
1618                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1619         }
1620
1621
1622
1623         /* create a structure that describes this connection we want to
1624            RST and store it in killtcp->connections
1625         */
1626         con = talloc(killtcp, struct ctdb_killtcp_con);
1627         CTDB_NO_MEMORY(ctdb, con);
1628         con->src_addr = src;
1629         con->dst_addr = dst;
1630         con->count    = 0;
1631         con->killtcp  = killtcp;
1632
1633
1634         trbt_insertarray32_callback(killtcp->connections,
1635                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1636                         add_killtcp_callback, con);
1637
1638         /* 
1639            If we dont have a socket to listen on yet we must create it
1640          */
1641         if (killtcp->capture_fd == -1) {
1642                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1643                 if (killtcp->capture_fd == -1) {
1644                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1645                         goto failed;
1646                 }
1647         }
1648
1649
1650         if (killtcp->fde == NULL) {
1651                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1652                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1653                                             capture_tcp_handler, killtcp);
1654
1655                 /* We also need to set up some events to tickle all these connections
1656                    until they are all reset
1657                 */
1658                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1659                                 ctdb_tickle_sentenced_connections, killtcp);
1660         }
1661
1662         /* tickle him once now */
1663         ctdb_sys_send_tcp(
1664                 &con->dst_addr,
1665                 &con->src_addr,
1666                 0, 0, 0);
1667
1668         return 0;
1669
1670 failed:
1671         talloc_free(vnn->killtcp);
1672         vnn->killtcp = NULL;
1673         return -1;
1674 }
1675
1676 /*
1677   kill a TCP connection.
1678  */
1679 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1680 {
1681         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1682
1683         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1684 }
1685
1686 /*
1687   called by a daemon to inform us of the entire list of TCP tickles for
1688   a particular public address.
1689   this control should only be sent by the node that is currently serving
1690   that public address.
1691  */
1692 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1693 {
1694         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1695         struct ctdb_tcp_array *tcparray;
1696         struct ctdb_vnn *vnn;
1697
1698         /* We must at least have tickles.num or else we cant verify the size
1699            of the received data blob
1700          */
1701         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1702                                         tickles.connections)) {
1703                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1704                 return -1;
1705         }
1706
1707         /* verify that the size of data matches what we expect */
1708         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1709                                 tickles.connections)
1710                          + sizeof(struct ctdb_tcp_connection)
1711                                  * list->tickles.num) {
1712                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1713                 return -1;
1714         }       
1715
1716         vnn = find_public_ip_vnn(ctdb, &list->addr);
1717         if (vnn == NULL) {
1718                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1719                         ctdb_addr_to_str(&list->addr)));
1720
1721                 return 1;
1722         }
1723
1724         /* remove any old ticklelist we might have */
1725         talloc_free(vnn->tcp_array);
1726         vnn->tcp_array = NULL;
1727
1728         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1729         CTDB_NO_MEMORY(ctdb, tcparray);
1730
1731         tcparray->num = list->tickles.num;
1732
1733         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1734         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1735
1736         memcpy(tcparray->connections, &list->tickles.connections[0], 
1737                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1738
1739         /* We now have a new fresh tickle list array for this vnn */
1740         vnn->tcp_array = talloc_steal(vnn, tcparray);
1741         
1742         return 0;
1743 }
1744
1745 /*
1746   called to return the full list of tickles for the puclic address associated 
1747   with the provided vnn
1748  */
1749 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1750 {
1751         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1752         struct ctdb_control_tcp_tickle_list *list;
1753         struct ctdb_tcp_array *tcparray;
1754         int num;
1755         struct ctdb_vnn *vnn;
1756
1757         vnn = find_public_ip_vnn(ctdb, addr);
1758         if (vnn == NULL) {
1759                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1760                         ctdb_addr_to_str(addr)));
1761
1762                 return 1;
1763         }
1764
1765         tcparray = vnn->tcp_array;
1766         if (tcparray) {
1767                 num = tcparray->num;
1768         } else {
1769                 num = 0;
1770         }
1771
1772         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1773                                 tickles.connections)
1774                         + sizeof(struct ctdb_tcp_connection) * num;
1775
1776         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1777         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1778         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1779
1780         list->addr = *addr;
1781         list->tickles.num = num;
1782         if (num) {
1783                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1784                         sizeof(struct ctdb_tcp_connection) * num);
1785         }
1786
1787         return 0;
1788 }
1789
1790
1791 /*
1792   set the list of all tcp tickles for a public address
1793  */
1794 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1795                               struct timeval timeout, uint32_t destnode, 
1796                               ctdb_sock_addr *addr,
1797                               struct ctdb_tcp_array *tcparray)
1798 {
1799         int ret, num;
1800         TDB_DATA data;
1801         struct ctdb_control_tcp_tickle_list *list;
1802
1803         if (tcparray) {
1804                 num = tcparray->num;
1805         } else {
1806                 num = 0;
1807         }
1808
1809         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1810                                 tickles.connections) +
1811                         sizeof(struct ctdb_tcp_connection) * num;
1812         data.dptr = talloc_size(ctdb, data.dsize);
1813         CTDB_NO_MEMORY(ctdb, data.dptr);
1814
1815         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1816         list->addr = *addr;
1817         list->tickles.num = num;
1818         if (tcparray) {
1819                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1820         }
1821
1822         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1823                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1824                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1825         if (ret != 0) {
1826                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1827                 return -1;
1828         }
1829
1830         talloc_free(data.dptr);
1831
1832         return ret;
1833 }
1834
1835
1836 /*
1837   perform tickle updates if required
1838  */
1839 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1840                                 struct timed_event *te, 
1841                                 struct timeval t, void *private_data)
1842 {
1843         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1844         int ret;
1845         struct ctdb_vnn *vnn;
1846
1847         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1848                 /* we only send out updates for public addresses that 
1849                    we have taken over
1850                  */
1851                 if (ctdb->pnn != vnn->pnn) {
1852                         continue;
1853                 }
1854                 /* We only send out the updates if we need to */
1855                 if (!vnn->tcp_update_needed) {
1856                         continue;
1857                 }
1858                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1859                                 TAKEOVER_TIMEOUT(),
1860                                 CTDB_BROADCAST_CONNECTED,
1861                                 &vnn->public_address,
1862                                 vnn->tcp_array);
1863                 if (ret != 0) {
1864                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1865                                 ctdb_addr_to_str(&vnn->public_address)));
1866                 }
1867         }
1868
1869         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1870                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1871                              ctdb_update_tcp_tickles, ctdb);
1872 }               
1873         
1874
1875 /*
1876   start periodic update of tcp tickles
1877  */
1878 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1879 {
1880         ctdb->tickle_update_context = talloc_new(ctdb);
1881
1882         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1883                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1884                              ctdb_update_tcp_tickles, ctdb);
1885 }
1886
1887
1888
1889
1890 struct control_gratious_arp {
1891         struct ctdb_context *ctdb;
1892         ctdb_sock_addr addr;
1893         const char *iface;
1894         int count;
1895 };
1896
1897 /*
1898   send a control_gratuitous arp
1899  */
1900 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1901                                   struct timeval t, void *private_data)
1902 {
1903         int ret;
1904         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1905                                                         struct control_gratious_arp);
1906
1907         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1908         if (ret != 0) {
1909                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1910         }
1911
1912
1913         arp->count++;
1914         if (arp->count == CTDB_ARP_REPEAT) {
1915                 talloc_free(arp);
1916                 return;
1917         }
1918
1919         event_add_timed(arp->ctdb->ev, arp, 
1920                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
1921                         send_gratious_arp, arp);
1922 }
1923
1924
1925 /*
1926   send a gratious arp 
1927  */
1928 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1929 {
1930         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1931         struct control_gratious_arp *arp;
1932
1933         /* verify the size of indata */
1934         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1935                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
1936                                  (unsigned)indata.dsize, 
1937                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1938                 return -1;
1939         }
1940         if (indata.dsize != 
1941                 ( offsetof(struct ctdb_control_gratious_arp, iface)
1942                 + gratious_arp->len ) ){
1943
1944                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1945                         "but should be %u bytes\n", 
1946                          (unsigned)indata.dsize, 
1947                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
1948                 return -1;
1949         }
1950
1951
1952         arp = talloc(ctdb, struct control_gratious_arp);
1953         CTDB_NO_MEMORY(ctdb, arp);
1954
1955         arp->ctdb  = ctdb;
1956         arp->addr   = gratious_arp->addr;
1957         arp->iface = talloc_strdup(arp, gratious_arp->iface);
1958         CTDB_NO_MEMORY(ctdb, arp->iface);
1959         arp->count = 0;
1960         
1961         event_add_timed(arp->ctdb->ev, arp, 
1962                         timeval_zero(), send_gratious_arp, arp);
1963
1964         return 0;
1965 }
1966
1967 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
1968 {
1969         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
1970
1971
1972         /* verify the size of indata */
1973         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
1974                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
1975                 return -1;
1976         }
1977         if (indata.dsize != 
1978                 ( offsetof(struct ctdb_control_ip_iface, iface)
1979                 + pub->len ) ){
1980
1981                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1982                         "but should be %u bytes\n", 
1983                          (unsigned)indata.dsize, 
1984                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
1985                 return -1;
1986         }
1987
1988         return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
1989 }
1990
1991 /*
1992   called when releaseip event finishes for del_public_address
1993  */
1994 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
1995                                 void *private_data)
1996 {
1997         talloc_free(private_data);
1998 }
1999
2000 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2001 {
2002         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2003         struct ctdb_vnn *vnn;
2004         int ret;
2005
2006         /* verify the size of indata */
2007         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2008                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2009                 return -1;
2010         }
2011         if (indata.dsize != 
2012                 ( offsetof(struct ctdb_control_ip_iface, iface)
2013                 + pub->len ) ){
2014
2015                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2016                         "but should be %u bytes\n", 
2017                          (unsigned)indata.dsize, 
2018                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2019                 return -1;
2020         }
2021
2022         /* walk over all public addresses until we find a match */
2023         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2024                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2025                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2026
2027                         DLIST_REMOVE(ctdb->vnn, vnn);
2028
2029                         ret = ctdb_event_script_callback(ctdb, 
2030                                          timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2031                                          mem_ctx, delete_ip_callback, mem_ctx,
2032                                          "releaseip %s %s %u",
2033                                          vnn->iface, 
2034                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2035                                          vnn->public_netmask_bits);
2036                         talloc_free(vnn);
2037                         if (ret != 0) {
2038                                 return -1;
2039                         }
2040                         return 0;
2041                 }
2042         }
2043
2044         return -1;
2045 }
2046