server: keep the interface information in a list of ctdb_iface structures
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = true;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 struct ctdb_takeover_arp {
173         struct ctdb_context *ctdb;
174         uint32_t count;
175         ctdb_sock_addr addr;
176         struct ctdb_tcp_array *tcparray;
177         struct ctdb_vnn *vnn;
178 };
179
180
181 /*
182   lists of tcp endpoints
183  */
184 struct ctdb_tcp_list {
185         struct ctdb_tcp_list *prev, *next;
186         struct ctdb_tcp_connection connection;
187 };
188
189 /*
190   list of clients to kill on IP release
191  */
192 struct ctdb_client_ip {
193         struct ctdb_client_ip *prev, *next;
194         struct ctdb_context *ctdb;
195         ctdb_sock_addr addr;
196         uint32_t client_id;
197 };
198
199
200 /*
201   send a gratuitous arp
202  */
203 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
204                                   struct timeval t, void *private_data)
205 {
206         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
207                                                         struct ctdb_takeover_arp);
208         int i, ret;
209         struct ctdb_tcp_array *tcparray;
210         const char *iface = ctdb_vnn_iface_string(arp->vnn);
211
212         ret = ctdb_sys_send_arp(&arp->addr, iface);
213         if (ret != 0) {
214                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
215                                   iface, strerror(errno)));
216         }
217
218         tcparray = arp->tcparray;
219         if (tcparray) {
220                 for (i=0;i<tcparray->num;i++) {
221                         struct ctdb_tcp_connection *tcon;
222
223                         tcon = &tcparray->connections[i];
224                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
225                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
226                                 ctdb_addr_to_str(&tcon->src_addr),
227                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
228                         ret = ctdb_sys_send_tcp(
229                                 &tcon->src_addr, 
230                                 &tcon->dst_addr,
231                                 0, 0, 0);
232                         if (ret != 0) {
233                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
234                                         ctdb_addr_to_str(&tcon->src_addr)));
235                         }
236                 }
237         }
238
239         arp->count++;
240
241         if (arp->count == CTDB_ARP_REPEAT) {
242                 talloc_free(arp);
243                 return;
244         }
245
246         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
247                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
248                         ctdb_control_send_arp, arp);
249 }
250
251 struct takeover_callback_state {
252         struct ctdb_req_control *c;
253         ctdb_sock_addr *addr;
254         struct ctdb_vnn *vnn;
255 };
256
257 /*
258   called when takeip event finishes
259  */
260 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
261                                  void *private_data)
262 {
263         struct takeover_callback_state *state = 
264                 talloc_get_type(private_data, struct takeover_callback_state);
265         struct ctdb_takeover_arp *arp;
266         struct ctdb_tcp_array *tcparray;
267
268         if (status != 0) {
269                 if (status == -ETIME) {
270                         ctdb_ban_self(ctdb);
271                 }
272                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
273                         ctdb_addr_to_str(state->addr),
274                         ctdb_vnn_iface_string(state->vnn)));
275                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
276                 talloc_free(state);
277                 return;
278         }
279
280         if (!state->vnn->takeover_ctx) {
281                 state->vnn->takeover_ctx = talloc_new(state->vnn);
282                 if (!state->vnn->takeover_ctx) {
283                         goto failed;
284                 }
285         }
286
287         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
288         if (!arp) goto failed;
289         
290         arp->ctdb = ctdb;
291         arp->addr = *state->addr;
292         arp->vnn  = state->vnn;
293
294         tcparray = state->vnn->tcp_array;
295         if (tcparray) {
296                 /* add all of the known tcp connections for this IP to the
297                    list of tcp connections to send tickle acks for */
298                 arp->tcparray = talloc_steal(arp, tcparray);
299
300                 state->vnn->tcp_array = NULL;
301                 state->vnn->tcp_update_needed = true;
302         }
303
304         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
305                         timeval_zero(), ctdb_control_send_arp, arp);
306
307         /* the control succeeded */
308         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
309         talloc_free(state);
310         return;
311
312 failed:
313         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
314         talloc_free(state);
315         return;
316 }
317
318 /*
319   Find the vnn of the node that has a public ip address
320   returns -1 if the address is not known as a public address
321  */
322 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
323 {
324         struct ctdb_vnn *vnn;
325
326         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
327                 if (ctdb_same_ip(&vnn->public_address, addr)) {
328                         return vnn;
329                 }
330         }
331
332         return NULL;
333 }
334
335 /*
336   take over an ip address
337  */
338 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
339                                  struct ctdb_req_control *c,
340                                  TDB_DATA indata, 
341                                  bool *async_reply)
342 {
343         int ret;
344         struct takeover_callback_state *state;
345         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
346         struct ctdb_vnn *vnn;
347
348         /* update out vnn list */
349         vnn = find_public_ip_vnn(ctdb, &pip->addr);
350         if (vnn == NULL) {
351                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
352                         ctdb_addr_to_str(&pip->addr)));
353                 return 0;
354         }
355         vnn->pnn = pip->pnn;
356
357         /* if our kernel already has this IP, do nothing */
358         if (ctdb_sys_have_ip(&pip->addr)) {
359                 return 0;
360         }
361
362         ret = ctdb_vnn_assign_iface(ctdb, vnn);
363         if (ret != 0) {
364                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
365                                  "assin a usable interface\n",
366                                  ctdb_addr_to_str(&pip->addr),
367                                  vnn->public_netmask_bits));
368                 return -1;
369         }
370
371         state = talloc(vnn, struct takeover_callback_state);
372         CTDB_NO_MEMORY(ctdb, state);
373
374         state->c = talloc_steal(ctdb, c);
375         state->addr = talloc(ctdb, ctdb_sock_addr);
376         CTDB_NO_MEMORY(ctdb, state->addr);
377
378         *state->addr = pip->addr;
379         state->vnn   = vnn;
380
381         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
382                 ctdb_addr_to_str(&pip->addr),
383                 vnn->public_netmask_bits, 
384                 ctdb_vnn_iface_string(vnn)));
385
386         ret = ctdb_event_script_callback(ctdb, 
387                                          state, takeover_ip_callback, state,
388                                          false,
389                                          CTDB_EVENT_TAKE_IP,
390                                          "%s %s %u",
391                                          ctdb_vnn_iface_string(vnn),
392                                          ctdb_addr_to_str(&pip->addr),
393                                          vnn->public_netmask_bits);
394
395         if (ret != 0) {
396                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
397                         ctdb_addr_to_str(&pip->addr),
398                         ctdb_vnn_iface_string(vnn)));
399                 talloc_free(state);
400                 return -1;
401         }
402
403         /* tell ctdb_control.c that we will be replying asynchronously */
404         *async_reply = true;
405
406         return 0;
407 }
408
409 /*
410   takeover an ip address old v4 style
411  */
412 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
413                                 struct ctdb_req_control *c,
414                                 TDB_DATA indata, 
415                                 bool *async_reply)
416 {
417         TDB_DATA data;
418         
419         data.dsize = sizeof(struct ctdb_public_ip);
420         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
421         CTDB_NO_MEMORY(ctdb, data.dptr);
422         
423         memcpy(data.dptr, indata.dptr, indata.dsize);
424         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
425 }
426
427 /*
428   kill any clients that are registered with a IP that is being released
429  */
430 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
431 {
432         struct ctdb_client_ip *ip;
433
434         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
435                 ctdb_addr_to_str(addr)));
436
437         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
438                 ctdb_sock_addr tmp_addr;
439
440                 tmp_addr = ip->addr;
441                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
442                         ip->client_id,
443                         ctdb_addr_to_str(&ip->addr)));
444
445                 if (ctdb_same_ip(&tmp_addr, addr)) {
446                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
447                                                                      ip->client_id, 
448                                                                      struct ctdb_client);
449                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
450                                 ip->client_id,
451                                 ctdb_addr_to_str(&ip->addr),
452                                 client->pid));
453
454                         if (client->pid != 0) {
455                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
456                                         (unsigned)client->pid,
457                                         ctdb_addr_to_str(addr),
458                                         ip->client_id));
459                                 kill(client->pid, SIGKILL);
460                         }
461                 }
462         }
463 }
464
465 /*
466   called when releaseip event finishes
467  */
468 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
469                                 void *private_data)
470 {
471         struct takeover_callback_state *state = 
472                 talloc_get_type(private_data, struct takeover_callback_state);
473         TDB_DATA data;
474
475         if (status == -ETIME) {
476                 ctdb_ban_self(ctdb);
477         }
478
479         /* send a message to all clients of this node telling them
480            that the cluster has been reconfigured and they should
481            release any sockets on this IP */
482         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
483         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
484         data.dsize = strlen((char *)data.dptr)+1;
485
486         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
487
488         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
489
490         /* kill clients that have registered with this IP */
491         release_kill_clients(ctdb, state->addr);
492
493         ctdb_vnn_unassign_iface(ctdb, state->vnn);
494
495         /* the control succeeded */
496         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
497         talloc_free(state);
498 }
499
500 /*
501   release an ip address
502  */
503 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
504                                 struct ctdb_req_control *c,
505                                 TDB_DATA indata, 
506                                 bool *async_reply)
507 {
508         int ret;
509         struct takeover_callback_state *state;
510         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
511         struct ctdb_vnn *vnn;
512
513         /* update our vnn list */
514         vnn = find_public_ip_vnn(ctdb, &pip->addr);
515         if (vnn == NULL) {
516                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
517                         ctdb_addr_to_str(&pip->addr)));
518                 return 0;
519         }
520         vnn->pnn = pip->pnn;
521
522         /* stop any previous arps */
523         talloc_free(vnn->takeover_ctx);
524         vnn->takeover_ctx = NULL;
525
526         if (!ctdb_sys_have_ip(&pip->addr)) {
527                 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
528                         ctdb_addr_to_str(&pip->addr),
529                         vnn->public_netmask_bits, 
530                         ctdb_vnn_iface_string(vnn)));
531                 ctdb_vnn_unassign_iface(ctdb, vnn);
532                 return 0;
533         }
534
535         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
536                 ctdb_addr_to_str(&pip->addr),
537                 vnn->public_netmask_bits, 
538                 ctdb_vnn_iface_string(vnn),
539                 pip->pnn));
540
541         state = talloc(ctdb, struct takeover_callback_state);
542         CTDB_NO_MEMORY(ctdb, state);
543
544         state->c = talloc_steal(state, c);
545         state->addr = talloc(state, ctdb_sock_addr);       
546         CTDB_NO_MEMORY(ctdb, state->addr);
547         *state->addr = pip->addr;
548         state->vnn   = vnn;
549
550         ret = ctdb_event_script_callback(ctdb, 
551                                          state, release_ip_callback, state,
552                                          false,
553                                          CTDB_EVENT_RELEASE_IP,
554                                          "%s %s %u",
555                                          ctdb_vnn_iface_string(vnn),
556                                          ctdb_addr_to_str(&pip->addr),
557                                          vnn->public_netmask_bits);
558         if (ret != 0) {
559                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
560                         ctdb_addr_to_str(&pip->addr),
561                         ctdb_vnn_iface_string(vnn)));
562                 talloc_free(state);
563                 return -1;
564         }
565
566         /* tell the control that we will be reply asynchronously */
567         *async_reply = true;
568         return 0;
569 }
570
571 /*
572   release an ip address old v4 style
573  */
574 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
575                                 struct ctdb_req_control *c,
576                                 TDB_DATA indata, 
577                                 bool *async_reply)
578 {
579         TDB_DATA data;
580         
581         data.dsize = sizeof(struct ctdb_public_ip);
582         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
583         CTDB_NO_MEMORY(ctdb, data.dptr);
584         
585         memcpy(data.dptr, indata.dptr, indata.dsize);
586         return ctdb_control_release_ip(ctdb, c, data, async_reply);
587 }
588
589
590 static int ctdb_add_public_address(struct ctdb_context *ctdb,
591                                    ctdb_sock_addr *addr,
592                                    unsigned mask, const char *ifaces)
593 {
594         struct ctdb_vnn      *vnn;
595         uint32_t num = 0;
596         char *tmp;
597         const char *iface;
598         int i;
599         int ret;
600
601         /* Verify that we dont have an entry for this ip yet */
602         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
603                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
604                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
605                                 ctdb_addr_to_str(addr)));
606                         return -1;
607                 }               
608         }
609
610         /* create a new vnn structure for this ip address */
611         vnn = talloc_zero(ctdb, struct ctdb_vnn);
612         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
613         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
614         tmp = talloc_strdup(vnn, ifaces);
615         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
616         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
617                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
618                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
619                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
620                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
621                 num++;
622         }
623         talloc_free(tmp);
624         vnn->ifaces[num] = NULL;
625         vnn->public_address      = *addr;
626         vnn->public_netmask_bits = mask;
627         vnn->pnn                 = -1;
628
629         for (i=0; vnn->ifaces[i]; i++) {
630                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
631                 if (ret != 0) {
632                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
633                                            "for public_address[%s]\n",
634                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
635                         talloc_free(vnn);
636                         return -1;
637                 }
638         }
639
640         DLIST_ADD(ctdb->vnn, vnn);
641
642         return 0;
643 }
644
645 /*
646   setup the event script directory
647 */
648 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
649 {
650         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
651         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
652         return 0;
653 }
654
655 /*
656   setup the public address lists from a file
657 */
658 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
659 {
660         char **lines;
661         int nlines;
662         int i;
663
664         lines = file_lines_load(alist, &nlines, ctdb);
665         if (lines == NULL) {
666                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
667                 return -1;
668         }
669         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
670                 nlines--;
671         }
672
673         for (i=0;i<nlines;i++) {
674                 unsigned mask;
675                 ctdb_sock_addr addr;
676                 const char *addrstr;
677                 const char *ifaces;
678                 char *tok, *line;
679
680                 line = lines[i];
681                 while ((*line == ' ') || (*line == '\t')) {
682                         line++;
683                 }
684                 if (*line == '#') {
685                         continue;
686                 }
687                 if (strcmp(line, "") == 0) {
688                         continue;
689                 }
690                 tok = strtok(line, " \t");
691                 addrstr = tok;
692                 tok = strtok(NULL, " \t");
693                 if (tok == NULL) {
694                         if (NULL == ctdb->default_public_interface) {
695                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
696                                          i+1));
697                                 talloc_free(lines);
698                                 return -1;
699                         }
700                         ifaces = ctdb->default_public_interface;
701                 } else {
702                         ifaces = tok;
703                 }
704
705                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
706                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
707                         talloc_free(lines);
708                         return -1;
709                 }
710                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
711                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
712                         talloc_free(lines);
713                         return -1;
714                 }
715         }
716
717         talloc_free(lines);
718         return 0;
719 }
720
721 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
722                               const char *iface,
723                               const char *ip)
724 {
725         struct ctdb_vnn *svnn;
726         bool ok;
727         int ret;
728
729         svnn = talloc_zero(ctdb, struct ctdb_vnn);
730         CTDB_NO_MEMORY(ctdb, svnn);
731
732         svnn->ifaces = talloc_array(svnn, const char *, 2);
733         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
734         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
735         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
736         svnn->ifaces[1] = NULL;
737
738         ok = parse_ip(ip, iface, 0, &svnn->public_address);
739         if (!ok) {
740                 talloc_free(svnn);
741                 return -1;
742         }
743
744         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
745         if (ret != 0) {
746                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
747                                    "for single_ip[%s]\n",
748                                    svnn->ifaces[0],
749                                    ctdb_addr_to_str(&svnn->public_address)));
750                 talloc_free(svnn);
751                 return -1;
752         }
753
754         ret = ctdb_vnn_assign_iface(ctdb, svnn);
755         if (ret != 0) {
756                 talloc_free(svnn);
757                 return -1;
758         }
759
760         ctdb->single_ip_vnn = svnn;
761         return 0;
762 }
763
764 struct ctdb_public_ip_list {
765         struct ctdb_public_ip_list *next;
766         uint32_t pnn;
767         ctdb_sock_addr addr;
768 };
769
770
771 /* Given a physical node, return the number of
772    public addresses that is currently assigned to this node.
773 */
774 static int node_ip_coverage(struct ctdb_context *ctdb, 
775         int32_t pnn,
776         struct ctdb_public_ip_list *ips)
777 {
778         int num=0;
779
780         for (;ips;ips=ips->next) {
781                 if (ips->pnn == pnn) {
782                         num++;
783                 }
784         }
785         return num;
786 }
787
788
789 /* Check if this is a public ip known to the node, i.e. can that
790    node takeover this ip ?
791 */
792 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
793                 struct ctdb_public_ip_list *ip)
794 {
795         struct ctdb_all_public_ips *public_ips;
796         int i;
797
798         public_ips = ctdb->nodes[pnn]->public_ips;
799
800         if (public_ips == NULL) {
801                 return -1;
802         }
803
804         for (i=0;i<public_ips->num;i++) {
805                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
806                         /* yes, this node can serve this public ip */
807                         return 0;
808                 }
809         }
810
811         return -1;
812 }
813
814
815 /* search the node lists list for a node to takeover this ip.
816    pick the node that currently are serving the least number of ips
817    so that the ips get spread out evenly.
818 */
819 static int find_takeover_node(struct ctdb_context *ctdb, 
820                 struct ctdb_node_map *nodemap, uint32_t mask, 
821                 struct ctdb_public_ip_list *ip,
822                 struct ctdb_public_ip_list *all_ips)
823 {
824         int pnn, min=0, num;
825         int i;
826
827         pnn    = -1;
828         for (i=0;i<nodemap->num;i++) {
829                 if (nodemap->nodes[i].flags & mask) {
830                         /* This node is not healty and can not be used to serve
831                            a public address 
832                         */
833                         continue;
834                 }
835
836                 /* verify that this node can serve this ip */
837                 if (can_node_serve_ip(ctdb, i, ip)) {
838                         /* no it couldnt   so skip to the next node */
839                         continue;
840                 }
841
842                 num = node_ip_coverage(ctdb, i, all_ips);
843                 /* was this the first node we checked ? */
844                 if (pnn == -1) {
845                         pnn = i;
846                         min  = num;
847                 } else {
848                         if (num < min) {
849                                 pnn = i;
850                                 min  = num;
851                         }
852                 }
853         }       
854         if (pnn == -1) {
855                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
856                         ctdb_addr_to_str(&ip->addr)));
857
858                 return -1;
859         }
860
861         ip->pnn = pnn;
862         return 0;
863 }
864
865 #define IP_KEYLEN       4
866 static uint32_t *ip_key(ctdb_sock_addr *ip)
867 {
868         static uint32_t key[IP_KEYLEN];
869
870         bzero(key, sizeof(key));
871
872         switch (ip->sa.sa_family) {
873         case AF_INET:
874                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
875                 break;
876         case AF_INET6:
877                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
878                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
879                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
880                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
881                 break;
882         default:
883                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
884                 return key;
885         }
886
887         return key;
888 }
889
890 static void *add_ip_callback(void *parm, void *data)
891 {
892         return parm;
893 }
894
895 void getips_count_callback(void *param, void *data)
896 {
897         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
898         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
899
900         new_ip->next = *ip_list;
901         *ip_list     = new_ip;
902 }
903
904 struct ctdb_public_ip_list *
905 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
906 {
907         int i, j;
908         struct ctdb_public_ip_list *ip_list;
909         struct ctdb_all_public_ips *public_ips;
910         trbt_tree_t *ip_tree;
911
912         ip_tree = trbt_create(tmp_ctx, 0);
913
914         for (i=0;i<ctdb->num_nodes;i++) {
915                 public_ips = ctdb->nodes[i]->public_ips;
916
917                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
918                         continue;
919                 }
920
921                 /* there were no public ips for this node */
922                 if (public_ips == NULL) {
923                         continue;
924                 }               
925
926                 for (j=0;j<public_ips->num;j++) {
927                         struct ctdb_public_ip_list *tmp_ip; 
928
929                         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
930                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
931                         tmp_ip->pnn  = public_ips->ips[j].pnn;
932                         tmp_ip->addr = public_ips->ips[j].addr;
933                         tmp_ip->next = NULL;
934
935                         trbt_insertarray32_callback(ip_tree,
936                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
937                                 add_ip_callback,
938                                 tmp_ip);
939                 }
940         }
941
942         ip_list = NULL;
943         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
944
945         return ip_list;
946 }
947
948 /*
949   make any IP alias changes for public addresses that are necessary 
950  */
951 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
952 {
953         int i, num_healthy, retries;
954         struct ctdb_public_ip ip;
955         struct ctdb_public_ipv4 ipv4;
956         uint32_t mask;
957         struct ctdb_public_ip_list *all_ips, *tmp_ip;
958         int maxnode, maxnum=0, minnode, minnum=0, num;
959         TDB_DATA data;
960         struct timeval timeout;
961         struct client_async_data *async_data;
962         struct ctdb_client_control_state *state;
963         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
964
965
966         ZERO_STRUCT(ip);
967
968         /* Count how many completely healthy nodes we have */
969         num_healthy = 0;
970         for (i=0;i<nodemap->num;i++) {
971                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
972                         num_healthy++;
973                 }
974         }
975
976         if (num_healthy > 0) {
977                 /* We have healthy nodes, so only consider them for 
978                    serving public addresses
979                 */
980                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
981         } else {
982                 /* We didnt have any completely healthy nodes so
983                    use "disabled" nodes as a fallback
984                 */
985                 mask = NODE_FLAGS_INACTIVE;
986         }
987
988         /* since nodes only know about those public addresses that
989            can be served by that particular node, no single node has
990            a full list of all public addresses that exist in the cluster.
991            Walk over all node structures and create a merged list of
992            all public addresses that exist in the cluster.
993         */
994         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
995
996         /* If we want deterministic ip allocations, i.e. that the ip addresses
997            will always be allocated the same way for a specific set of
998            available/unavailable nodes.
999         */
1000         if (1 == ctdb->tunable.deterministic_public_ips) {              
1001                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1002                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1003                         tmp_ip->pnn = i%nodemap->num;
1004                 }
1005         }
1006
1007
1008         /* mark all public addresses with a masked node as being served by
1009            node -1
1010         */
1011         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1012                 if (tmp_ip->pnn == -1) {
1013                         continue;
1014                 }
1015                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1016                         tmp_ip->pnn = -1;
1017                 }
1018         }
1019
1020         /* verify that the assigned nodes can serve that public ip
1021            and set it to -1 if not
1022         */
1023         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1024                 if (tmp_ip->pnn == -1) {
1025                         continue;
1026                 }
1027                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1028                         /* this node can not serve this ip. */
1029                         tmp_ip->pnn = -1;
1030                 }
1031         }
1032
1033
1034         /* now we must redistribute all public addresses with takeover node
1035            -1 among the nodes available
1036         */
1037         retries = 0;
1038 try_again:
1039         /* loop over all ip's and find a physical node to cover for 
1040            each unassigned ip.
1041         */
1042         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1043                 if (tmp_ip->pnn == -1) {
1044                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1045                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1046                                         ctdb_addr_to_str(&tmp_ip->addr)));
1047                         }
1048                 }
1049         }
1050
1051         /* If we dont want ips to fail back after a node becomes healthy
1052            again, we wont even try to reallocat the ip addresses so that
1053            they are evenly spread out.
1054            This can NOT be used at the same time as DeterministicIPs !
1055         */
1056         if (1 == ctdb->tunable.no_ip_failback) {
1057                 if (1 == ctdb->tunable.deterministic_public_ips) {
1058                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1059                 }
1060                 goto finished;
1061         }
1062
1063
1064         /* now, try to make sure the ip adresses are evenly distributed
1065            across the node.
1066            for each ip address, loop over all nodes that can serve this
1067            ip and make sure that the difference between the node
1068            serving the most and the node serving the least ip's are not greater
1069            than 1.
1070         */
1071         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1072                 if (tmp_ip->pnn == -1) {
1073                         continue;
1074                 }
1075
1076                 /* Get the highest and lowest number of ips's served by any 
1077                    valid node which can serve this ip.
1078                 */
1079                 maxnode = -1;
1080                 minnode = -1;
1081                 for (i=0;i<nodemap->num;i++) {
1082                         if (nodemap->nodes[i].flags & mask) {
1083                                 continue;
1084                         }
1085
1086                         /* only check nodes that can actually serve this ip */
1087                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1088                                 /* no it couldnt   so skip to the next node */
1089                                 continue;
1090                         }
1091
1092                         num = node_ip_coverage(ctdb, i, all_ips);
1093                         if (maxnode == -1) {
1094                                 maxnode = i;
1095                                 maxnum  = num;
1096                         } else {
1097                                 if (num > maxnum) {
1098                                         maxnode = i;
1099                                         maxnum  = num;
1100                                 }
1101                         }
1102                         if (minnode == -1) {
1103                                 minnode = i;
1104                                 minnum  = num;
1105                         } else {
1106                                 if (num < minnum) {
1107                                         minnode = i;
1108                                         minnum  = num;
1109                                 }
1110                         }
1111                 }
1112                 if (maxnode == -1) {
1113                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1114                                 ctdb_addr_to_str(&tmp_ip->addr)));
1115
1116                         continue;
1117                 }
1118
1119                 /* If we want deterministic IPs then dont try to reallocate 
1120                    them to spread out the load.
1121                 */
1122                 if (1 == ctdb->tunable.deterministic_public_ips) {
1123                         continue;
1124                 }
1125
1126                 /* if the spread between the smallest and largest coverage by
1127                    a node is >=2 we steal one of the ips from the node with
1128                    most coverage to even things out a bit.
1129                    try to do this at most 5 times  since we dont want to spend
1130                    too much time balancing the ip coverage.
1131                 */
1132                 if ( (maxnum > minnum+1)
1133                   && (retries < 5) ){
1134                         struct ctdb_public_ip_list *tmp;
1135
1136                         /* mark one of maxnode's vnn's as unassigned and try
1137                            again
1138                         */
1139                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1140                                 if (tmp->pnn == maxnode) {
1141                                         tmp->pnn = -1;
1142                                         retries++;
1143                                         goto try_again;
1144                                 }
1145                         }
1146                 }
1147         }
1148
1149
1150         /* finished distributing the public addresses, now just send the 
1151            info out to the nodes
1152         */
1153 finished:
1154
1155         /* at this point ->pnn is the node which will own each IP
1156            or -1 if there is no node that can cover this ip
1157         */
1158
1159         /* now tell all nodes to delete any alias that they should not
1160            have.  This will be a NOOP on nodes that don't currently
1161            hold the given alias */
1162         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1163         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1164
1165         for (i=0;i<nodemap->num;i++) {
1166                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1167                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1168                         continue;
1169                 }
1170
1171                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1172                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1173                                 /* This node should be serving this
1174                                    vnn so dont tell it to release the ip
1175                                 */
1176                                 continue;
1177                         }
1178                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1179                                 ipv4.pnn = tmp_ip->pnn;
1180                                 ipv4.sin = tmp_ip->addr.ip;
1181
1182                                 timeout = TAKEOVER_TIMEOUT();
1183                                 data.dsize = sizeof(ipv4);
1184                                 data.dptr  = (uint8_t *)&ipv4;
1185                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1186                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1187                                                 data, async_data,
1188                                                 &timeout, NULL);
1189                         } else {
1190                                 ip.pnn  = tmp_ip->pnn;
1191                                 ip.addr = tmp_ip->addr;
1192
1193                                 timeout = TAKEOVER_TIMEOUT();
1194                                 data.dsize = sizeof(ip);
1195                                 data.dptr  = (uint8_t *)&ip;
1196                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1197                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1198                                                 data, async_data,
1199                                                 &timeout, NULL);
1200                         }
1201
1202                         if (state == NULL) {
1203                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1204                                 talloc_free(tmp_ctx);
1205                                 return -1;
1206                         }
1207                 
1208                         ctdb_client_async_add(async_data, state);
1209                 }
1210         }
1211         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1212                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1213                 talloc_free(tmp_ctx);
1214                 return -1;
1215         }
1216         talloc_free(async_data);
1217
1218
1219         /* tell all nodes to get their own IPs */
1220         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1221         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1222         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1223                 if (tmp_ip->pnn == -1) {
1224                         /* this IP won't be taken over */
1225                         continue;
1226                 }
1227
1228                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1229                         ipv4.pnn = tmp_ip->pnn;
1230                         ipv4.sin = tmp_ip->addr.ip;
1231
1232                         timeout = TAKEOVER_TIMEOUT();
1233                         data.dsize = sizeof(ipv4);
1234                         data.dptr  = (uint8_t *)&ipv4;
1235                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1236                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1237                                         data, async_data,
1238                                         &timeout, NULL);
1239                 } else {
1240                         ip.pnn  = tmp_ip->pnn;
1241                         ip.addr = tmp_ip->addr;
1242
1243                         timeout = TAKEOVER_TIMEOUT();
1244                         data.dsize = sizeof(ip);
1245                         data.dptr  = (uint8_t *)&ip;
1246                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1247                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1248                                         data, async_data,
1249                                         &timeout, NULL);
1250                 }
1251                 if (state == NULL) {
1252                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1253                         talloc_free(tmp_ctx);
1254                         return -1;
1255                 }
1256                 
1257                 ctdb_client_async_add(async_data, state);
1258         }
1259         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1260                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1261                 talloc_free(tmp_ctx);
1262                 return -1;
1263         }
1264
1265         talloc_free(tmp_ctx);
1266         return 0;
1267 }
1268
1269
1270 /*
1271   destroy a ctdb_client_ip structure
1272  */
1273 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1274 {
1275         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1276                 ctdb_addr_to_str(&ip->addr),
1277                 ntohs(ip->addr.ip.sin_port),
1278                 ip->client_id));
1279
1280         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1281         return 0;
1282 }
1283
1284 /*
1285   called by a client to inform us of a TCP connection that it is managing
1286   that should tickled with an ACK when IP takeover is done
1287   we handle both the old ipv4 style of packets as well as the new ipv4/6
1288   pdus.
1289  */
1290 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1291                                 TDB_DATA indata)
1292 {
1293         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1294         struct ctdb_control_tcp *old_addr = NULL;
1295         struct ctdb_control_tcp_addr new_addr;
1296         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1297         struct ctdb_tcp_list *tcp;
1298         struct ctdb_control_tcp_vnn t;
1299         int ret;
1300         TDB_DATA data;
1301         struct ctdb_client_ip *ip;
1302         struct ctdb_vnn *vnn;
1303         ctdb_sock_addr addr;
1304
1305         switch (indata.dsize) {
1306         case sizeof(struct ctdb_control_tcp):
1307                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1308                 ZERO_STRUCT(new_addr);
1309                 tcp_sock = &new_addr;
1310                 tcp_sock->src.ip  = old_addr->src;
1311                 tcp_sock->dest.ip = old_addr->dest;
1312                 break;
1313         case sizeof(struct ctdb_control_tcp_addr):
1314                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1315                 break;
1316         default:
1317                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1318                                  "to ctdb_control_tcp_client. size was %d but "
1319                                  "only allowed sizes are %lu and %lu\n",
1320                                  (int)indata.dsize,
1321                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1322                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1323                 return -1;
1324         }
1325
1326         addr = tcp_sock->src;
1327         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1328         addr = tcp_sock->dest;
1329         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1330
1331         ZERO_STRUCT(addr);
1332         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1333         vnn = find_public_ip_vnn(ctdb, &addr);
1334         if (vnn == NULL) {
1335                 switch (addr.sa.sa_family) {
1336                 case AF_INET:
1337                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1338                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1339                                         ctdb_addr_to_str(&addr)));
1340                         }
1341                         break;
1342                 case AF_INET6:
1343                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1344                                 ctdb_addr_to_str(&addr)));
1345                         break;
1346                 default:
1347                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1348                 }
1349
1350                 return 0;
1351         }
1352
1353         if (vnn->pnn != ctdb->pnn) {
1354                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1355                         ctdb_addr_to_str(&addr),
1356                         client_id, client->pid));
1357                 /* failing this call will tell smbd to die */
1358                 return -1;
1359         }
1360
1361         ip = talloc(client, struct ctdb_client_ip);
1362         CTDB_NO_MEMORY(ctdb, ip);
1363
1364         ip->ctdb      = ctdb;
1365         ip->addr      = addr;
1366         ip->client_id = client_id;
1367         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1368         DLIST_ADD(ctdb->client_ip_list, ip);
1369
1370         tcp = talloc(client, struct ctdb_tcp_list);
1371         CTDB_NO_MEMORY(ctdb, tcp);
1372
1373         tcp->connection.src_addr = tcp_sock->src;
1374         tcp->connection.dst_addr = tcp_sock->dest;
1375
1376         DLIST_ADD(client->tcp_list, tcp);
1377
1378         t.src  = tcp_sock->src;
1379         t.dest = tcp_sock->dest;
1380
1381         data.dptr = (uint8_t *)&t;
1382         data.dsize = sizeof(t);
1383
1384         switch (addr.sa.sa_family) {
1385         case AF_INET:
1386                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1387                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1388                         ctdb_addr_to_str(&tcp_sock->src),
1389                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1390                 break;
1391         case AF_INET6:
1392                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1393                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1394                         ctdb_addr_to_str(&tcp_sock->src),
1395                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1396                 break;
1397         default:
1398                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1399         }
1400
1401
1402         /* tell all nodes about this tcp connection */
1403         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1404                                        CTDB_CONTROL_TCP_ADD,
1405                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1406         if (ret != 0) {
1407                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1408                 return -1;
1409         }
1410
1411         return 0;
1412 }
1413
1414 /*
1415   find a tcp address on a list
1416  */
1417 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1418                                            struct ctdb_tcp_connection *tcp)
1419 {
1420         int i;
1421
1422         if (array == NULL) {
1423                 return NULL;
1424         }
1425
1426         for (i=0;i<array->num;i++) {
1427                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1428                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1429                         return &array->connections[i];
1430                 }
1431         }
1432         return NULL;
1433 }
1434
1435 /*
1436   called by a daemon to inform us of a TCP connection that one of its
1437   clients managing that should tickled with an ACK when IP takeover is
1438   done
1439  */
1440 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1441 {
1442         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1443         struct ctdb_tcp_array *tcparray;
1444         struct ctdb_tcp_connection tcp;
1445         struct ctdb_vnn *vnn;
1446
1447         vnn = find_public_ip_vnn(ctdb, &p->dest);
1448         if (vnn == NULL) {
1449                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1450                         ctdb_addr_to_str(&p->dest)));
1451
1452                 return -1;
1453         }
1454
1455
1456         tcparray = vnn->tcp_array;
1457
1458         /* If this is the first tickle */
1459         if (tcparray == NULL) {
1460                 tcparray = talloc_size(ctdb->nodes, 
1461                         offsetof(struct ctdb_tcp_array, connections) +
1462                         sizeof(struct ctdb_tcp_connection) * 1);
1463                 CTDB_NO_MEMORY(ctdb, tcparray);
1464                 vnn->tcp_array = tcparray;
1465
1466                 tcparray->num = 0;
1467                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1468                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1469
1470                 tcparray->connections[tcparray->num].src_addr = p->src;
1471                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1472                 tcparray->num++;
1473                 return 0;
1474         }
1475
1476
1477         /* Do we already have this tickle ?*/
1478         tcp.src_addr = p->src;
1479         tcp.dst_addr = p->dest;
1480         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1481                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1482                         ctdb_addr_to_str(&tcp.dst_addr),
1483                         ntohs(tcp.dst_addr.ip.sin_port),
1484                         vnn->pnn));
1485                 return 0;
1486         }
1487
1488         /* A new tickle, we must add it to the array */
1489         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1490                                         struct ctdb_tcp_connection,
1491                                         tcparray->num+1);
1492         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1493
1494         vnn->tcp_array = tcparray;
1495         tcparray->connections[tcparray->num].src_addr = p->src;
1496         tcparray->connections[tcparray->num].dst_addr = p->dest;
1497         tcparray->num++;
1498                                 
1499         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1500                 ctdb_addr_to_str(&tcp.dst_addr),
1501                 ntohs(tcp.dst_addr.ip.sin_port),
1502                 vnn->pnn));
1503
1504         return 0;
1505 }
1506
1507
1508 /*
1509   called by a daemon to inform us of a TCP connection that one of its
1510   clients managing that should tickled with an ACK when IP takeover is
1511   done
1512  */
1513 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1514 {
1515         struct ctdb_tcp_connection *tcpp;
1516         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1517
1518         if (vnn == NULL) {
1519                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1520                         ctdb_addr_to_str(&conn->dst_addr)));
1521                 return;
1522         }
1523
1524         /* if the array is empty we cant remove it
1525            and we dont need to do anything
1526          */
1527         if (vnn->tcp_array == NULL) {
1528                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1529                         ctdb_addr_to_str(&conn->dst_addr),
1530                         ntohs(conn->dst_addr.ip.sin_port)));
1531                 return;
1532         }
1533
1534
1535         /* See if we know this connection
1536            if we dont know this connection  then we dont need to do anything
1537          */
1538         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1539         if (tcpp == NULL) {
1540                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1541                         ctdb_addr_to_str(&conn->dst_addr),
1542                         ntohs(conn->dst_addr.ip.sin_port)));
1543                 return;
1544         }
1545
1546
1547         /* We need to remove this entry from the array.
1548            Instead of allocating a new array and copying data to it
1549            we cheat and just copy the last entry in the existing array
1550            to the entry that is to be removed and just shring the 
1551            ->num field
1552          */
1553         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1554         vnn->tcp_array->num--;
1555
1556         /* If we deleted the last entry we also need to remove the entire array
1557          */
1558         if (vnn->tcp_array->num == 0) {
1559                 talloc_free(vnn->tcp_array);
1560                 vnn->tcp_array = NULL;
1561         }               
1562
1563         vnn->tcp_update_needed = true;
1564
1565         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1566                 ctdb_addr_to_str(&conn->src_addr),
1567                 ntohs(conn->src_addr.ip.sin_port)));
1568 }
1569
1570
1571 /*
1572   called when a daemon restarts - send all tickes for all public addresses
1573   we are serving immediately to the new node.
1574  */
1575 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1576 {
1577 /*XXX here we should send all tickes we are serving to the new node */
1578         return 0;
1579 }
1580
1581
1582 /*
1583   called when a client structure goes away - hook to remove
1584   elements from the tcp_list in all daemons
1585  */
1586 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1587 {
1588         while (client->tcp_list) {
1589                 struct ctdb_tcp_list *tcp = client->tcp_list;
1590                 DLIST_REMOVE(client->tcp_list, tcp);
1591                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1592         }
1593 }
1594
1595
1596 /*
1597   release all IPs on shutdown
1598  */
1599 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1600 {
1601         struct ctdb_vnn *vnn;
1602
1603         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1604                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1605                         ctdb_vnn_unassign_iface(ctdb, vnn);
1606                         continue;
1607                 }
1608                 if (!vnn->iface) {
1609                         continue;
1610                 }
1611                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1612                                   ctdb_vnn_iface_string(vnn),
1613                                   ctdb_addr_to_str(&vnn->public_address),
1614                                   vnn->public_netmask_bits);
1615                 release_kill_clients(ctdb, &vnn->public_address);
1616                 ctdb_vnn_unassign_iface(ctdb, vnn);
1617         }
1618 }
1619
1620
1621 /*
1622   get list of public IPs
1623  */
1624 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1625                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1626 {
1627         int i, num, len;
1628         struct ctdb_all_public_ips *ips;
1629         struct ctdb_vnn *vnn;
1630
1631         /* count how many public ip structures we have */
1632         num = 0;
1633         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1634                 num++;
1635         }
1636
1637         len = offsetof(struct ctdb_all_public_ips, ips) + 
1638                 num*sizeof(struct ctdb_public_ip);
1639         ips = talloc_zero_size(outdata, len);
1640         CTDB_NO_MEMORY(ctdb, ips);
1641
1642         outdata->dsize = len;
1643         outdata->dptr  = (uint8_t *)ips;
1644
1645         ips->num = num;
1646         i = 0;
1647         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1648                 ips->ips[i].pnn  = vnn->pnn;
1649                 ips->ips[i].addr = vnn->public_address;
1650                 i++;
1651         }
1652
1653         return 0;
1654 }
1655
1656
1657 /*
1658   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1659  */
1660 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1661                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1662 {
1663         int i, num, len;
1664         struct ctdb_all_public_ipsv4 *ips;
1665         struct ctdb_vnn *vnn;
1666
1667         /* count how many public ip structures we have */
1668         num = 0;
1669         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1670                 if (vnn->public_address.sa.sa_family != AF_INET) {
1671                         continue;
1672                 }
1673                 num++;
1674         }
1675
1676         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1677                 num*sizeof(struct ctdb_public_ipv4);
1678         ips = talloc_zero_size(outdata, len);
1679         CTDB_NO_MEMORY(ctdb, ips);
1680
1681         outdata->dsize = len;
1682         outdata->dptr  = (uint8_t *)ips;
1683
1684         ips->num = num;
1685         i = 0;
1686         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1687                 if (vnn->public_address.sa.sa_family != AF_INET) {
1688                         continue;
1689                 }
1690                 ips->ips[i].pnn = vnn->pnn;
1691                 ips->ips[i].sin = vnn->public_address.ip;
1692                 i++;
1693         }
1694
1695         return 0;
1696 }
1697
1698
1699 /* 
1700    structure containing the listening socket and the list of tcp connections
1701    that the ctdb daemon is to kill
1702 */
1703 struct ctdb_kill_tcp {
1704         struct ctdb_vnn *vnn;
1705         struct ctdb_context *ctdb;
1706         int capture_fd;
1707         struct fd_event *fde;
1708         trbt_tree_t *connections;
1709         void *private_data;
1710 };
1711
1712 /*
1713   a tcp connection that is to be killed
1714  */
1715 struct ctdb_killtcp_con {
1716         ctdb_sock_addr src_addr;
1717         ctdb_sock_addr dst_addr;
1718         int count;
1719         struct ctdb_kill_tcp *killtcp;
1720 };
1721
1722 /* this function is used to create a key to represent this socketpair
1723    in the killtcp tree.
1724    this key is used to insert and lookup matching socketpairs that are
1725    to be tickled and RST
1726 */
1727 #define KILLTCP_KEYLEN  10
1728 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1729 {
1730         static uint32_t key[KILLTCP_KEYLEN];
1731
1732         bzero(key, sizeof(key));
1733
1734         if (src->sa.sa_family != dst->sa.sa_family) {
1735                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1736                 return key;
1737         }
1738         
1739         switch (src->sa.sa_family) {
1740         case AF_INET:
1741                 key[0]  = dst->ip.sin_addr.s_addr;
1742                 key[1]  = src->ip.sin_addr.s_addr;
1743                 key[2]  = dst->ip.sin_port;
1744                 key[3]  = src->ip.sin_port;
1745                 break;
1746         case AF_INET6:
1747                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1748                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1749                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1750                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1751                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1752                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1753                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1754                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1755                 key[8]  = dst->ip6.sin6_port;
1756                 key[9]  = src->ip6.sin6_port;
1757                 break;
1758         default:
1759                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1760                 return key;
1761         }
1762
1763         return key;
1764 }
1765
1766 /*
1767   called when we get a read event on the raw socket
1768  */
1769 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1770                                 uint16_t flags, void *private_data)
1771 {
1772         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1773         struct ctdb_killtcp_con *con;
1774         ctdb_sock_addr src, dst;
1775         uint32_t ack_seq, seq;
1776
1777         if (!(flags & EVENT_FD_READ)) {
1778                 return;
1779         }
1780
1781         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1782                                 killtcp->private_data,
1783                                 &src, &dst,
1784                                 &ack_seq, &seq) != 0) {
1785                 /* probably a non-tcp ACK packet */
1786                 return;
1787         }
1788
1789         /* check if we have this guy in our list of connections
1790            to kill
1791         */
1792         con = trbt_lookuparray32(killtcp->connections, 
1793                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1794         if (con == NULL) {
1795                 /* no this was some other packet we can just ignore */
1796                 return;
1797         }
1798
1799         /* This one has been tickled !
1800            now reset him and remove him from the list.
1801          */
1802         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1803                 ntohs(con->dst_addr.ip.sin_port),
1804                 ctdb_addr_to_str(&con->src_addr),
1805                 ntohs(con->src_addr.ip.sin_port)));
1806
1807         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1808         talloc_free(con);
1809 }
1810
1811
1812 /* when traversing the list of all tcp connections to send tickle acks to
1813    (so that we can capture the ack coming back and kill the connection
1814     by a RST)
1815    this callback is called for each connection we are currently trying to kill
1816 */
1817 static void tickle_connection_traverse(void *param, void *data)
1818 {
1819         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1820
1821         /* have tried too many times, just give up */
1822         if (con->count >= 5) {
1823                 talloc_free(con);
1824                 return;
1825         }
1826
1827         /* othervise, try tickling it again */
1828         con->count++;
1829         ctdb_sys_send_tcp(
1830                 (ctdb_sock_addr *)&con->dst_addr,
1831                 (ctdb_sock_addr *)&con->src_addr,
1832                 0, 0, 0);
1833 }
1834
1835
1836 /* 
1837    called every second until all sentenced connections have been reset
1838  */
1839 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1840                                               struct timeval t, void *private_data)
1841 {
1842         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1843
1844
1845         /* loop over all connections sending tickle ACKs */
1846         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1847
1848
1849         /* If there are no more connections to kill we can remove the
1850            entire killtcp structure
1851          */
1852         if ( (killtcp->connections == NULL) || 
1853              (killtcp->connections->root == NULL) ) {
1854                 talloc_free(killtcp);
1855                 return;
1856         }
1857
1858         /* try tickling them again in a seconds time
1859          */
1860         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1861                         ctdb_tickle_sentenced_connections, killtcp);
1862 }
1863
1864 /*
1865   destroy the killtcp structure
1866  */
1867 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1868 {
1869         killtcp->vnn->killtcp = NULL;
1870         return 0;
1871 }
1872
1873
1874 /* nothing fancy here, just unconditionally replace any existing
1875    connection structure with the new one.
1876
1877    dont even free the old one if it did exist, that one is talloc_stolen
1878    by the same node in the tree anyway and will be deleted when the new data 
1879    is deleted
1880 */
1881 static void *add_killtcp_callback(void *parm, void *data)
1882 {
1883         return parm;
1884 }
1885
1886 /*
1887   add a tcp socket to the list of connections we want to RST
1888  */
1889 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1890                                        ctdb_sock_addr *s,
1891                                        ctdb_sock_addr *d)
1892 {
1893         ctdb_sock_addr src, dst;
1894         struct ctdb_kill_tcp *killtcp;
1895         struct ctdb_killtcp_con *con;
1896         struct ctdb_vnn *vnn;
1897
1898         ctdb_canonicalize_ip(s, &src);
1899         ctdb_canonicalize_ip(d, &dst);
1900
1901         vnn = find_public_ip_vnn(ctdb, &dst);
1902         if (vnn == NULL) {
1903                 vnn = find_public_ip_vnn(ctdb, &src);
1904         }
1905         if (vnn == NULL) {
1906                 /* if it is not a public ip   it could be our 'single ip' */
1907                 if (ctdb->single_ip_vnn) {
1908                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1909                                 vnn = ctdb->single_ip_vnn;
1910                         }
1911                 }
1912         }
1913         if (vnn == NULL) {
1914                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1915                 return -1;
1916         }
1917
1918         killtcp = vnn->killtcp;
1919         
1920         /* If this is the first connection to kill we must allocate
1921            a new structure
1922          */
1923         if (killtcp == NULL) {
1924                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1925                 CTDB_NO_MEMORY(ctdb, killtcp);
1926
1927                 killtcp->vnn         = vnn;
1928                 killtcp->ctdb        = ctdb;
1929                 killtcp->capture_fd  = -1;
1930                 killtcp->connections = trbt_create(killtcp, 0);
1931
1932                 vnn->killtcp         = killtcp;
1933                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1934         }
1935
1936
1937
1938         /* create a structure that describes this connection we want to
1939            RST and store it in killtcp->connections
1940         */
1941         con = talloc(killtcp, struct ctdb_killtcp_con);
1942         CTDB_NO_MEMORY(ctdb, con);
1943         con->src_addr = src;
1944         con->dst_addr = dst;
1945         con->count    = 0;
1946         con->killtcp  = killtcp;
1947
1948
1949         trbt_insertarray32_callback(killtcp->connections,
1950                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1951                         add_killtcp_callback, con);
1952
1953         /* 
1954            If we dont have a socket to listen on yet we must create it
1955          */
1956         if (killtcp->capture_fd == -1) {
1957                 const char *iface = ctdb_vnn_iface_string(vnn);
1958                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
1959                 if (killtcp->capture_fd == -1) {
1960                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
1961                                           "socket on iface '%s' for killtcp (%s)\n",
1962                                           iface, strerror(errno)));
1963                         goto failed;
1964                 }
1965         }
1966
1967
1968         if (killtcp->fde == NULL) {
1969                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1970                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1971                                             capture_tcp_handler, killtcp);
1972
1973                 /* We also need to set up some events to tickle all these connections
1974                    until they are all reset
1975                 */
1976                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1977                                 ctdb_tickle_sentenced_connections, killtcp);
1978         }
1979
1980         /* tickle him once now */
1981         ctdb_sys_send_tcp(
1982                 &con->dst_addr,
1983                 &con->src_addr,
1984                 0, 0, 0);
1985
1986         return 0;
1987
1988 failed:
1989         talloc_free(vnn->killtcp);
1990         vnn->killtcp = NULL;
1991         return -1;
1992 }
1993
1994 /*
1995   kill a TCP connection.
1996  */
1997 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1998 {
1999         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2000
2001         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2002 }
2003
2004 /*
2005   called by a daemon to inform us of the entire list of TCP tickles for
2006   a particular public address.
2007   this control should only be sent by the node that is currently serving
2008   that public address.
2009  */
2010 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2011 {
2012         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2013         struct ctdb_tcp_array *tcparray;
2014         struct ctdb_vnn *vnn;
2015
2016         /* We must at least have tickles.num or else we cant verify the size
2017            of the received data blob
2018          */
2019         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2020                                         tickles.connections)) {
2021                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2022                 return -1;
2023         }
2024
2025         /* verify that the size of data matches what we expect */
2026         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2027                                 tickles.connections)
2028                          + sizeof(struct ctdb_tcp_connection)
2029                                  * list->tickles.num) {
2030                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2031                 return -1;
2032         }       
2033
2034         vnn = find_public_ip_vnn(ctdb, &list->addr);
2035         if (vnn == NULL) {
2036                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2037                         ctdb_addr_to_str(&list->addr)));
2038
2039                 return 1;
2040         }
2041
2042         /* remove any old ticklelist we might have */
2043         talloc_free(vnn->tcp_array);
2044         vnn->tcp_array = NULL;
2045
2046         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2047         CTDB_NO_MEMORY(ctdb, tcparray);
2048
2049         tcparray->num = list->tickles.num;
2050
2051         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2052         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2053
2054         memcpy(tcparray->connections, &list->tickles.connections[0], 
2055                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2056
2057         /* We now have a new fresh tickle list array for this vnn */
2058         vnn->tcp_array = talloc_steal(vnn, tcparray);
2059         
2060         return 0;
2061 }
2062
2063 /*
2064   called to return the full list of tickles for the puclic address associated 
2065   with the provided vnn
2066  */
2067 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2068 {
2069         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2070         struct ctdb_control_tcp_tickle_list *list;
2071         struct ctdb_tcp_array *tcparray;
2072         int num;
2073         struct ctdb_vnn *vnn;
2074
2075         vnn = find_public_ip_vnn(ctdb, addr);
2076         if (vnn == NULL) {
2077                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2078                         ctdb_addr_to_str(addr)));
2079
2080                 return 1;
2081         }
2082
2083         tcparray = vnn->tcp_array;
2084         if (tcparray) {
2085                 num = tcparray->num;
2086         } else {
2087                 num = 0;
2088         }
2089
2090         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2091                                 tickles.connections)
2092                         + sizeof(struct ctdb_tcp_connection) * num;
2093
2094         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2095         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2096         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2097
2098         list->addr = *addr;
2099         list->tickles.num = num;
2100         if (num) {
2101                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2102                         sizeof(struct ctdb_tcp_connection) * num);
2103         }
2104
2105         return 0;
2106 }
2107
2108
2109 /*
2110   set the list of all tcp tickles for a public address
2111  */
2112 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2113                               struct timeval timeout, uint32_t destnode, 
2114                               ctdb_sock_addr *addr,
2115                               struct ctdb_tcp_array *tcparray)
2116 {
2117         int ret, num;
2118         TDB_DATA data;
2119         struct ctdb_control_tcp_tickle_list *list;
2120
2121         if (tcparray) {
2122                 num = tcparray->num;
2123         } else {
2124                 num = 0;
2125         }
2126
2127         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2128                                 tickles.connections) +
2129                         sizeof(struct ctdb_tcp_connection) * num;
2130         data.dptr = talloc_size(ctdb, data.dsize);
2131         CTDB_NO_MEMORY(ctdb, data.dptr);
2132
2133         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2134         list->addr = *addr;
2135         list->tickles.num = num;
2136         if (tcparray) {
2137                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2138         }
2139
2140         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2141                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2142                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2143         if (ret != 0) {
2144                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2145                 return -1;
2146         }
2147
2148         talloc_free(data.dptr);
2149
2150         return ret;
2151 }
2152
2153
2154 /*
2155   perform tickle updates if required
2156  */
2157 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2158                                 struct timed_event *te, 
2159                                 struct timeval t, void *private_data)
2160 {
2161         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2162         int ret;
2163         struct ctdb_vnn *vnn;
2164
2165         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2166                 /* we only send out updates for public addresses that 
2167                    we have taken over
2168                  */
2169                 if (ctdb->pnn != vnn->pnn) {
2170                         continue;
2171                 }
2172                 /* We only send out the updates if we need to */
2173                 if (!vnn->tcp_update_needed) {
2174                         continue;
2175                 }
2176                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2177                                 TAKEOVER_TIMEOUT(),
2178                                 CTDB_BROADCAST_CONNECTED,
2179                                 &vnn->public_address,
2180                                 vnn->tcp_array);
2181                 if (ret != 0) {
2182                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2183                                 ctdb_addr_to_str(&vnn->public_address)));
2184                 }
2185         }
2186
2187         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2188                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2189                              ctdb_update_tcp_tickles, ctdb);
2190 }               
2191         
2192
2193 /*
2194   start periodic update of tcp tickles
2195  */
2196 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2197 {
2198         ctdb->tickle_update_context = talloc_new(ctdb);
2199
2200         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2201                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2202                              ctdb_update_tcp_tickles, ctdb);
2203 }
2204
2205
2206
2207
2208 struct control_gratious_arp {
2209         struct ctdb_context *ctdb;
2210         ctdb_sock_addr addr;
2211         const char *iface;
2212         int count;
2213 };
2214
2215 /*
2216   send a control_gratuitous arp
2217  */
2218 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2219                                   struct timeval t, void *private_data)
2220 {
2221         int ret;
2222         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2223                                                         struct control_gratious_arp);
2224
2225         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2226         if (ret != 0) {
2227                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2228                                  arp->iface, strerror(errno)));
2229         }
2230
2231
2232         arp->count++;
2233         if (arp->count == CTDB_ARP_REPEAT) {
2234                 talloc_free(arp);
2235                 return;
2236         }
2237
2238         event_add_timed(arp->ctdb->ev, arp, 
2239                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2240                         send_gratious_arp, arp);
2241 }
2242
2243
2244 /*
2245   send a gratious arp 
2246  */
2247 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2248 {
2249         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2250         struct control_gratious_arp *arp;
2251
2252         /* verify the size of indata */
2253         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2254                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2255                                  (unsigned)indata.dsize, 
2256                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2257                 return -1;
2258         }
2259         if (indata.dsize != 
2260                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2261                 + gratious_arp->len ) ){
2262
2263                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2264                         "but should be %u bytes\n", 
2265                          (unsigned)indata.dsize, 
2266                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2267                 return -1;
2268         }
2269
2270
2271         arp = talloc(ctdb, struct control_gratious_arp);
2272         CTDB_NO_MEMORY(ctdb, arp);
2273
2274         arp->ctdb  = ctdb;
2275         arp->addr   = gratious_arp->addr;
2276         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2277         CTDB_NO_MEMORY(ctdb, arp->iface);
2278         arp->count = 0;
2279         
2280         event_add_timed(arp->ctdb->ev, arp, 
2281                         timeval_zero(), send_gratious_arp, arp);
2282
2283         return 0;
2284 }
2285
2286 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2287 {
2288         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2289         int ret;
2290
2291         /* verify the size of indata */
2292         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2293                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2294                 return -1;
2295         }
2296         if (indata.dsize != 
2297                 ( offsetof(struct ctdb_control_ip_iface, iface)
2298                 + pub->len ) ){
2299
2300                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2301                         "but should be %u bytes\n", 
2302                          (unsigned)indata.dsize, 
2303                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2304                 return -1;
2305         }
2306
2307         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2308
2309         if (ret != 0) {
2310                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2311                 return -1;
2312         }
2313
2314         return 0;
2315 }
2316
2317 /*
2318   called when releaseip event finishes for del_public_address
2319  */
2320 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2321                                 void *private_data)
2322 {
2323         talloc_free(private_data);
2324 }
2325
2326 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2327 {
2328         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2329         struct ctdb_vnn *vnn;
2330         int ret;
2331
2332         /* verify the size of indata */
2333         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2334                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2335                 return -1;
2336         }
2337         if (indata.dsize != 
2338                 ( offsetof(struct ctdb_control_ip_iface, iface)
2339                 + pub->len ) ){
2340
2341                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2342                         "but should be %u bytes\n", 
2343                          (unsigned)indata.dsize, 
2344                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2345                 return -1;
2346         }
2347
2348         /* walk over all public addresses until we find a match */
2349         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2350                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2351                         TALLOC_CTX *mem_ctx;
2352
2353                         DLIST_REMOVE(ctdb->vnn, vnn);
2354                         if (vnn->iface == NULL) {
2355                                 talloc_free(vnn);
2356                                 return 0;
2357                         }
2358
2359                         mem_ctx = talloc_new(ctdb);
2360                         ret = ctdb_event_script_callback(ctdb, 
2361                                          mem_ctx, delete_ip_callback, mem_ctx,
2362                                          false,
2363                                          CTDB_EVENT_RELEASE_IP,
2364                                          "%s %s %u",
2365                                          ctdb_vnn_iface_string(vnn),
2366                                          ctdb_addr_to_str(&vnn->public_address),
2367                                          vnn->public_netmask_bits);
2368                         ctdb_vnn_unassign_iface(ctdb, vnn);
2369                         talloc_free(vnn);
2370                         if (ret != 0) {
2371                                 return -1;
2372                         }
2373                         return 0;
2374                 }
2375         }
2376
2377         return -1;
2378 }
2379