Merge commit 'rusty/vacuum-fix-master'
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561         struct ctdb_iface *best_iface = NULL;
562
563         if (pip->pnn != ctdb->pnn) {
564                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565                                  "with pnn %d, but we're node %d\n",
566                                  ctdb_addr_to_str(&pip->addr),
567                                  pip->pnn, ctdb->pnn));
568                 return -1;
569         }
570
571         /* update out vnn list */
572         vnn = find_public_ip_vnn(ctdb, &pip->addr);
573         if (vnn == NULL) {
574                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575                         ctdb_addr_to_str(&pip->addr)));
576                 return 0;
577         }
578
579         have_ip = ctdb_sys_have_ip(&pip->addr);
580         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581         if (best_iface == NULL) {
582                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583                                  "a usable interface (old %s, have_ip %d)\n",
584                                  ctdb_addr_to_str(&vnn->public_address),
585                                  vnn->public_netmask_bits,
586                                  ctdb_vnn_iface_string(vnn),
587                                  have_ip));
588                 return -1;
589         }
590
591         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
592                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
593                 have_ip = false;
594         }
595
596         if (vnn->iface == NULL && have_ip) {
597                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
598                                   "but we have no interface assigned, has someone manually configured it?"
599                                   "banning ourself\n",
600                                  ctdb_addr_to_str(&vnn->public_address)));
601                 ctdb_ban_self(ctdb);
602                 return -1;
603         }
604
605         if (vnn->pnn != ctdb->pnn && have_ip) {
606                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
607                                   "and we have it on iface[%s], but it was assigned to node %d"
608                                   "and we are node %d, banning ourself\n",
609                                  ctdb_addr_to_str(&vnn->public_address),
610                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
611                 ctdb_ban_self(ctdb);
612                 return -1;
613         }
614
615         if (vnn->iface) {
616                 if (vnn->iface->link_up) {
617                         /* only move when the rebalance gains something */
618                         if (vnn->iface->references > (best_iface->references + 1)) {
619                                 do_updateip = true;
620                         }
621                 } else if (vnn->iface != best_iface) {
622                         do_updateip = true;
623                 }
624         }
625
626         if (!have_ip) {
627                 if (do_updateip) {
628                         ctdb_vnn_unassign_iface(ctdb, vnn);
629                         do_updateip = false;
630                 }
631                 do_takeip = true;
632         }
633
634         if (do_takeip) {
635                 ret = ctdb_do_takeip(ctdb, c, vnn);
636                 if (ret != 0) {
637                         return -1;
638                 }
639         } else if (do_updateip) {
640                 ret = ctdb_do_updateip(ctdb, c, vnn);
641                 if (ret != 0) {
642                         return -1;
643                 }
644         } else {
645                 /*
646                  * The interface is up and the kernel known the ip
647                  * => do nothing
648                  */
649                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
650                         ctdb_addr_to_str(&pip->addr),
651                         vnn->public_netmask_bits,
652                         ctdb_vnn_iface_string(vnn)));
653                 return 0;
654         }
655
656         /* tell ctdb_control.c that we will be replying asynchronously */
657         *async_reply = true;
658
659         return 0;
660 }
661
662 /*
663   takeover an ip address old v4 style
664  */
665 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
666                                 struct ctdb_req_control *c,
667                                 TDB_DATA indata, 
668                                 bool *async_reply)
669 {
670         TDB_DATA data;
671         
672         data.dsize = sizeof(struct ctdb_public_ip);
673         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
674         CTDB_NO_MEMORY(ctdb, data.dptr);
675         
676         memcpy(data.dptr, indata.dptr, indata.dsize);
677         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
678 }
679
680 /*
681   kill any clients that are registered with a IP that is being released
682  */
683 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
684 {
685         struct ctdb_client_ip *ip;
686
687         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
688                 ctdb_addr_to_str(addr)));
689
690         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
691                 ctdb_sock_addr tmp_addr;
692
693                 tmp_addr = ip->addr;
694                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
695                         ip->client_id,
696                         ctdb_addr_to_str(&ip->addr)));
697
698                 if (ctdb_same_ip(&tmp_addr, addr)) {
699                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
700                                                                      ip->client_id, 
701                                                                      struct ctdb_client);
702                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
703                                 ip->client_id,
704                                 ctdb_addr_to_str(&ip->addr),
705                                 client->pid));
706
707                         if (client->pid != 0) {
708                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
709                                         (unsigned)client->pid,
710                                         ctdb_addr_to_str(addr),
711                                         ip->client_id));
712                                 kill(client->pid, SIGKILL);
713                         }
714                 }
715         }
716 }
717
718 /*
719   called when releaseip event finishes
720  */
721 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
722                                 void *private_data)
723 {
724         struct takeover_callback_state *state = 
725                 talloc_get_type(private_data, struct takeover_callback_state);
726         TDB_DATA data;
727
728         if (status == -ETIME) {
729                 ctdb_ban_self(ctdb);
730         }
731
732         /* send a message to all clients of this node telling them
733            that the cluster has been reconfigured and they should
734            release any sockets on this IP */
735         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
736         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
737         data.dsize = strlen((char *)data.dptr)+1;
738
739         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
740
741         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
742
743         /* kill clients that have registered with this IP */
744         release_kill_clients(ctdb, state->addr);
745
746         ctdb_vnn_unassign_iface(ctdb, state->vnn);
747
748         /* the control succeeded */
749         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
750         talloc_free(state);
751 }
752
753 /*
754   release an ip address
755  */
756 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
757                                 struct ctdb_req_control *c,
758                                 TDB_DATA indata, 
759                                 bool *async_reply)
760 {
761         int ret;
762         struct takeover_callback_state *state;
763         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
764         struct ctdb_vnn *vnn;
765
766         /* update our vnn list */
767         vnn = find_public_ip_vnn(ctdb, &pip->addr);
768         if (vnn == NULL) {
769                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
770                         ctdb_addr_to_str(&pip->addr)));
771                 return 0;
772         }
773         vnn->pnn = pip->pnn;
774
775         /* stop any previous arps */
776         talloc_free(vnn->takeover_ctx);
777         vnn->takeover_ctx = NULL;
778
779         if (!ctdb_sys_have_ip(&pip->addr)) {
780                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
781                         ctdb_addr_to_str(&pip->addr),
782                         vnn->public_netmask_bits, 
783                         ctdb_vnn_iface_string(vnn)));
784                 ctdb_vnn_unassign_iface(ctdb, vnn);
785                 return 0;
786         }
787
788         if (vnn->iface == NULL) {
789                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
790                                   "but we have no interface assigned, has someone manually configured it?"
791                                   "banning ourself\n",
792                                  ctdb_addr_to_str(&vnn->public_address)));
793                 ctdb_ban_self(ctdb);
794                 return -1;
795         }
796
797         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
798                 ctdb_addr_to_str(&pip->addr),
799                 vnn->public_netmask_bits, 
800                 ctdb_vnn_iface_string(vnn),
801                 pip->pnn));
802
803         state = talloc(ctdb, struct takeover_callback_state);
804         CTDB_NO_MEMORY(ctdb, state);
805
806         state->c = talloc_steal(state, c);
807         state->addr = talloc(state, ctdb_sock_addr);       
808         CTDB_NO_MEMORY(ctdb, state->addr);
809         *state->addr = pip->addr;
810         state->vnn   = vnn;
811
812         ret = ctdb_event_script_callback(ctdb, 
813                                          state, release_ip_callback, state,
814                                          false,
815                                          CTDB_EVENT_RELEASE_IP,
816                                          "%s %s %u",
817                                          ctdb_vnn_iface_string(vnn),
818                                          ctdb_addr_to_str(&pip->addr),
819                                          vnn->public_netmask_bits);
820         if (ret != 0) {
821                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
822                         ctdb_addr_to_str(&pip->addr),
823                         ctdb_vnn_iface_string(vnn)));
824                 talloc_free(state);
825                 return -1;
826         }
827
828         /* tell the control that we will be reply asynchronously */
829         *async_reply = true;
830         return 0;
831 }
832
833 /*
834   release an ip address old v4 style
835  */
836 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
837                                 struct ctdb_req_control *c,
838                                 TDB_DATA indata, 
839                                 bool *async_reply)
840 {
841         TDB_DATA data;
842         
843         data.dsize = sizeof(struct ctdb_public_ip);
844         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
845         CTDB_NO_MEMORY(ctdb, data.dptr);
846         
847         memcpy(data.dptr, indata.dptr, indata.dsize);
848         return ctdb_control_release_ip(ctdb, c, data, async_reply);
849 }
850
851
852 static int ctdb_add_public_address(struct ctdb_context *ctdb,
853                                    ctdb_sock_addr *addr,
854                                    unsigned mask, const char *ifaces)
855 {
856         struct ctdb_vnn      *vnn;
857         uint32_t num = 0;
858         char *tmp;
859         const char *iface;
860         int i;
861         int ret;
862
863         /* Verify that we dont have an entry for this ip yet */
864         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
865                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
866                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
867                                 ctdb_addr_to_str(addr)));
868                         return -1;
869                 }               
870         }
871
872         /* create a new vnn structure for this ip address */
873         vnn = talloc_zero(ctdb, struct ctdb_vnn);
874         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
875         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
876         tmp = talloc_strdup(vnn, ifaces);
877         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
878         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
879                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
880                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
881                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
882                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
883                 num++;
884         }
885         talloc_free(tmp);
886         vnn->ifaces[num] = NULL;
887         vnn->public_address      = *addr;
888         vnn->public_netmask_bits = mask;
889         vnn->pnn                 = -1;
890
891         for (i=0; vnn->ifaces[i]; i++) {
892                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
893                 if (ret != 0) {
894                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
895                                            "for public_address[%s]\n",
896                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
897                         talloc_free(vnn);
898                         return -1;
899                 }
900         }
901
902         DLIST_ADD(ctdb->vnn, vnn);
903
904         return 0;
905 }
906
907 /*
908   setup the event script directory
909 */
910 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
911 {
912         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
913         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
914         return 0;
915 }
916
917 /*
918   setup the public address lists from a file
919 */
920 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
921 {
922         char **lines;
923         int nlines;
924         int i;
925
926         lines = file_lines_load(alist, &nlines, ctdb);
927         if (lines == NULL) {
928                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
929                 return -1;
930         }
931         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
932                 nlines--;
933         }
934
935         for (i=0;i<nlines;i++) {
936                 unsigned mask;
937                 ctdb_sock_addr addr;
938                 const char *addrstr;
939                 const char *ifaces;
940                 char *tok, *line;
941
942                 line = lines[i];
943                 while ((*line == ' ') || (*line == '\t')) {
944                         line++;
945                 }
946                 if (*line == '#') {
947                         continue;
948                 }
949                 if (strcmp(line, "") == 0) {
950                         continue;
951                 }
952                 tok = strtok(line, " \t");
953                 addrstr = tok;
954                 tok = strtok(NULL, " \t");
955                 if (tok == NULL) {
956                         if (NULL == ctdb->default_public_interface) {
957                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
958                                          i+1));
959                                 talloc_free(lines);
960                                 return -1;
961                         }
962                         ifaces = ctdb->default_public_interface;
963                 } else {
964                         ifaces = tok;
965                 }
966
967                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
968                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
969                         talloc_free(lines);
970                         return -1;
971                 }
972                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
973                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
974                         talloc_free(lines);
975                         return -1;
976                 }
977         }
978
979         talloc_free(lines);
980         return 0;
981 }
982
983 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
984                               const char *iface,
985                               const char *ip)
986 {
987         struct ctdb_vnn *svnn;
988         bool ok;
989         int ret;
990
991         svnn = talloc_zero(ctdb, struct ctdb_vnn);
992         CTDB_NO_MEMORY(ctdb, svnn);
993
994         svnn->ifaces = talloc_array(svnn, const char *, 2);
995         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
996         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
997         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
998         svnn->ifaces[1] = NULL;
999
1000         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1001         if (!ok) {
1002                 talloc_free(svnn);
1003                 return -1;
1004         }
1005
1006         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1007         if (ret != 0) {
1008                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1009                                    "for single_ip[%s]\n",
1010                                    svnn->ifaces[0],
1011                                    ctdb_addr_to_str(&svnn->public_address)));
1012                 talloc_free(svnn);
1013                 return -1;
1014         }
1015
1016         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1017         if (ret != 0) {
1018                 talloc_free(svnn);
1019                 return -1;
1020         }
1021
1022         ctdb->single_ip_vnn = svnn;
1023         return 0;
1024 }
1025
1026 struct ctdb_public_ip_list {
1027         struct ctdb_public_ip_list *next;
1028         uint32_t pnn;
1029         ctdb_sock_addr addr;
1030 };
1031
1032
1033 /* Given a physical node, return the number of
1034    public addresses that is currently assigned to this node.
1035 */
1036 static int node_ip_coverage(struct ctdb_context *ctdb, 
1037         int32_t pnn,
1038         struct ctdb_public_ip_list *ips)
1039 {
1040         int num=0;
1041
1042         for (;ips;ips=ips->next) {
1043                 if (ips->pnn == pnn) {
1044                         num++;
1045                 }
1046         }
1047         return num;
1048 }
1049
1050
1051 /* Check if this is a public ip known to the node, i.e. can that
1052    node takeover this ip ?
1053 */
1054 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1055                 struct ctdb_public_ip_list *ip)
1056 {
1057         struct ctdb_all_public_ips *public_ips;
1058         int i;
1059
1060         public_ips = ctdb->nodes[pnn]->available_public_ips;
1061
1062         if (public_ips == NULL) {
1063                 return -1;
1064         }
1065
1066         for (i=0;i<public_ips->num;i++) {
1067                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1068                         /* yes, this node can serve this public ip */
1069                         return 0;
1070                 }
1071         }
1072
1073         return -1;
1074 }
1075
1076
1077 /* search the node lists list for a node to takeover this ip.
1078    pick the node that currently are serving the least number of ips
1079    so that the ips get spread out evenly.
1080 */
1081 static int find_takeover_node(struct ctdb_context *ctdb, 
1082                 struct ctdb_node_map *nodemap, uint32_t mask, 
1083                 struct ctdb_public_ip_list *ip,
1084                 struct ctdb_public_ip_list *all_ips)
1085 {
1086         int pnn, min=0, num;
1087         int i;
1088
1089         pnn    = -1;
1090         for (i=0;i<nodemap->num;i++) {
1091                 if (nodemap->nodes[i].flags & mask) {
1092                         /* This node is not healty and can not be used to serve
1093                            a public address 
1094                         */
1095                         continue;
1096                 }
1097
1098                 /* verify that this node can serve this ip */
1099                 if (can_node_serve_ip(ctdb, i, ip)) {
1100                         /* no it couldnt   so skip to the next node */
1101                         continue;
1102                 }
1103
1104                 num = node_ip_coverage(ctdb, i, all_ips);
1105                 /* was this the first node we checked ? */
1106                 if (pnn == -1) {
1107                         pnn = i;
1108                         min  = num;
1109                 } else {
1110                         if (num < min) {
1111                                 pnn = i;
1112                                 min  = num;
1113                         }
1114                 }
1115         }       
1116         if (pnn == -1) {
1117                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1118                         ctdb_addr_to_str(&ip->addr)));
1119
1120                 return -1;
1121         }
1122
1123         ip->pnn = pnn;
1124         return 0;
1125 }
1126
1127 #define IP_KEYLEN       4
1128 static uint32_t *ip_key(ctdb_sock_addr *ip)
1129 {
1130         static uint32_t key[IP_KEYLEN];
1131
1132         bzero(key, sizeof(key));
1133
1134         switch (ip->sa.sa_family) {
1135         case AF_INET:
1136                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1137                 break;
1138         case AF_INET6:
1139                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1140                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1141                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1142                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1143                 break;
1144         default:
1145                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1146                 return key;
1147         }
1148
1149         return key;
1150 }
1151
1152 static void *add_ip_callback(void *parm, void *data)
1153 {
1154         return parm;
1155 }
1156
1157 void getips_count_callback(void *param, void *data)
1158 {
1159         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1160         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1161
1162         new_ip->next = *ip_list;
1163         *ip_list     = new_ip;
1164 }
1165
1166 static struct ctdb_public_ip_list *
1167 create_merged_ip_list(struct ctdb_context *ctdb)
1168 {
1169         int i, j;
1170         struct ctdb_public_ip_list *ip_list;
1171         struct ctdb_all_public_ips *public_ips;
1172
1173         if (ctdb->ip_tree != NULL) {
1174                 talloc_free(ctdb->ip_tree);
1175                 ctdb->ip_tree = NULL;
1176         }
1177         ctdb->ip_tree = trbt_create(ctdb, 0);
1178
1179         for (i=0;i<ctdb->num_nodes;i++) {
1180                 public_ips = ctdb->nodes[i]->known_public_ips;
1181
1182                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1183                         continue;
1184                 }
1185
1186                 /* there were no public ips for this node */
1187                 if (public_ips == NULL) {
1188                         continue;
1189                 }               
1190
1191                 for (j=0;j<public_ips->num;j++) {
1192                         struct ctdb_public_ip_list *tmp_ip; 
1193
1194                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1195                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1196                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1197                         tmp_ip->addr = public_ips->ips[j].addr;
1198                         tmp_ip->next = NULL;
1199
1200                         trbt_insertarray32_callback(ctdb->ip_tree,
1201                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1202                                 add_ip_callback,
1203                                 tmp_ip);
1204                 }
1205         }
1206
1207         ip_list = NULL;
1208         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1209
1210         return ip_list;
1211 }
1212
1213 /*
1214   make any IP alias changes for public addresses that are necessary 
1215  */
1216 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1217 {
1218         int i, num_healthy, retries;
1219         struct ctdb_public_ip ip;
1220         struct ctdb_public_ipv4 ipv4;
1221         uint32_t mask;
1222         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1223         int maxnode, maxnum=0, minnode, minnum=0, num;
1224         TDB_DATA data;
1225         struct timeval timeout;
1226         struct client_async_data *async_data;
1227         struct ctdb_client_control_state *state;
1228         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1229
1230
1231         ZERO_STRUCT(ip);
1232
1233         /* Count how many completely healthy nodes we have */
1234         num_healthy = 0;
1235         for (i=0;i<nodemap->num;i++) {
1236                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1237                         num_healthy++;
1238                 }
1239         }
1240
1241         if (num_healthy > 0) {
1242                 /* We have healthy nodes, so only consider them for 
1243                    serving public addresses
1244                 */
1245                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1246         } else {
1247                 /* We didnt have any completely healthy nodes so
1248                    use "disabled" nodes as a fallback
1249                 */
1250                 mask = NODE_FLAGS_INACTIVE;
1251         }
1252
1253         /* since nodes only know about those public addresses that
1254            can be served by that particular node, no single node has
1255            a full list of all public addresses that exist in the cluster.
1256            Walk over all node structures and create a merged list of
1257            all public addresses that exist in the cluster.
1258
1259            keep the tree of ips around as ctdb->ip_tree
1260         */
1261         all_ips = create_merged_ip_list(ctdb);
1262
1263         /* If we want deterministic ip allocations, i.e. that the ip addresses
1264            will always be allocated the same way for a specific set of
1265            available/unavailable nodes.
1266         */
1267         if (1 == ctdb->tunable.deterministic_public_ips) {              
1268                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1269                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1270                         tmp_ip->pnn = i%nodemap->num;
1271                 }
1272         }
1273
1274
1275         /* mark all public addresses with a masked node as being served by
1276            node -1
1277         */
1278         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1279                 if (tmp_ip->pnn == -1) {
1280                         continue;
1281                 }
1282                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1283                         tmp_ip->pnn = -1;
1284                 }
1285         }
1286
1287         /* verify that the assigned nodes can serve that public ip
1288            and set it to -1 if not
1289         */
1290         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1291                 if (tmp_ip->pnn == -1) {
1292                         continue;
1293                 }
1294                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1295                         /* this node can not serve this ip. */
1296                         tmp_ip->pnn = -1;
1297                 }
1298         }
1299
1300
1301         /* now we must redistribute all public addresses with takeover node
1302            -1 among the nodes available
1303         */
1304         retries = 0;
1305 try_again:
1306         /* loop over all ip's and find a physical node to cover for 
1307            each unassigned ip.
1308         */
1309         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1310                 if (tmp_ip->pnn == -1) {
1311                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1312                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1313                                         ctdb_addr_to_str(&tmp_ip->addr)));
1314                         }
1315                 }
1316         }
1317
1318         /* If we dont want ips to fail back after a node becomes healthy
1319            again, we wont even try to reallocat the ip addresses so that
1320            they are evenly spread out.
1321            This can NOT be used at the same time as DeterministicIPs !
1322         */
1323         if (1 == ctdb->tunable.no_ip_failback) {
1324                 if (1 == ctdb->tunable.deterministic_public_ips) {
1325                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1326                 }
1327                 goto finished;
1328         }
1329
1330
1331         /* now, try to make sure the ip adresses are evenly distributed
1332            across the node.
1333            for each ip address, loop over all nodes that can serve this
1334            ip and make sure that the difference between the node
1335            serving the most and the node serving the least ip's are not greater
1336            than 1.
1337         */
1338         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1339                 if (tmp_ip->pnn == -1) {
1340                         continue;
1341                 }
1342
1343                 /* Get the highest and lowest number of ips's served by any 
1344                    valid node which can serve this ip.
1345                 */
1346                 maxnode = -1;
1347                 minnode = -1;
1348                 for (i=0;i<nodemap->num;i++) {
1349                         if (nodemap->nodes[i].flags & mask) {
1350                                 continue;
1351                         }
1352
1353                         /* only check nodes that can actually serve this ip */
1354                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1355                                 /* no it couldnt   so skip to the next node */
1356                                 continue;
1357                         }
1358
1359                         num = node_ip_coverage(ctdb, i, all_ips);
1360                         if (maxnode == -1) {
1361                                 maxnode = i;
1362                                 maxnum  = num;
1363                         } else {
1364                                 if (num > maxnum) {
1365                                         maxnode = i;
1366                                         maxnum  = num;
1367                                 }
1368                         }
1369                         if (minnode == -1) {
1370                                 minnode = i;
1371                                 minnum  = num;
1372                         } else {
1373                                 if (num < minnum) {
1374                                         minnode = i;
1375                                         minnum  = num;
1376                                 }
1377                         }
1378                 }
1379                 if (maxnode == -1) {
1380                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1381                                 ctdb_addr_to_str(&tmp_ip->addr)));
1382
1383                         continue;
1384                 }
1385
1386                 /* If we want deterministic IPs then dont try to reallocate 
1387                    them to spread out the load.
1388                 */
1389                 if (1 == ctdb->tunable.deterministic_public_ips) {
1390                         continue;
1391                 }
1392
1393                 /* if the spread between the smallest and largest coverage by
1394                    a node is >=2 we steal one of the ips from the node with
1395                    most coverage to even things out a bit.
1396                    try to do this at most 5 times  since we dont want to spend
1397                    too much time balancing the ip coverage.
1398                 */
1399                 if ( (maxnum > minnum+1)
1400                   && (retries < 5) ){
1401                         struct ctdb_public_ip_list *tmp;
1402
1403                         /* mark one of maxnode's vnn's as unassigned and try
1404                            again
1405                         */
1406                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1407                                 if (tmp->pnn == maxnode) {
1408                                         tmp->pnn = -1;
1409                                         retries++;
1410                                         goto try_again;
1411                                 }
1412                         }
1413                 }
1414         }
1415
1416
1417         /* finished distributing the public addresses, now just send the 
1418            info out to the nodes
1419         */
1420 finished:
1421
1422         /* at this point ->pnn is the node which will own each IP
1423            or -1 if there is no node that can cover this ip
1424         */
1425
1426         /* now tell all nodes to delete any alias that they should not
1427            have.  This will be a NOOP on nodes that don't currently
1428            hold the given alias */
1429         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1430         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1431
1432         for (i=0;i<nodemap->num;i++) {
1433                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1434                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1435                         continue;
1436                 }
1437
1438                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1439                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1440                                 /* This node should be serving this
1441                                    vnn so dont tell it to release the ip
1442                                 */
1443                                 continue;
1444                         }
1445                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1446                                 ipv4.pnn = tmp_ip->pnn;
1447                                 ipv4.sin = tmp_ip->addr.ip;
1448
1449                                 timeout = TAKEOVER_TIMEOUT();
1450                                 data.dsize = sizeof(ipv4);
1451                                 data.dptr  = (uint8_t *)&ipv4;
1452                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1453                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1454                                                 data, async_data,
1455                                                 &timeout, NULL);
1456                         } else {
1457                                 ip.pnn  = tmp_ip->pnn;
1458                                 ip.addr = tmp_ip->addr;
1459
1460                                 timeout = TAKEOVER_TIMEOUT();
1461                                 data.dsize = sizeof(ip);
1462                                 data.dptr  = (uint8_t *)&ip;
1463                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1464                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1465                                                 data, async_data,
1466                                                 &timeout, NULL);
1467                         }
1468
1469                         if (state == NULL) {
1470                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471                                 talloc_free(tmp_ctx);
1472                                 return -1;
1473                         }
1474                 
1475                         ctdb_client_async_add(async_data, state);
1476                 }
1477         }
1478         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480                 talloc_free(tmp_ctx);
1481                 return -1;
1482         }
1483         talloc_free(async_data);
1484
1485
1486         /* tell all nodes to get their own IPs */
1487         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490                 if (tmp_ip->pnn == -1) {
1491                         /* this IP won't be taken over */
1492                         continue;
1493                 }
1494
1495                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496                         ipv4.pnn = tmp_ip->pnn;
1497                         ipv4.sin = tmp_ip->addr.ip;
1498
1499                         timeout = TAKEOVER_TIMEOUT();
1500                         data.dsize = sizeof(ipv4);
1501                         data.dptr  = (uint8_t *)&ipv4;
1502                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1503                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1504                                         data, async_data,
1505                                         &timeout, NULL);
1506                 } else {
1507                         ip.pnn  = tmp_ip->pnn;
1508                         ip.addr = tmp_ip->addr;
1509
1510                         timeout = TAKEOVER_TIMEOUT();
1511                         data.dsize = sizeof(ip);
1512                         data.dptr  = (uint8_t *)&ip;
1513                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1514                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1515                                         data, async_data,
1516                                         &timeout, NULL);
1517                 }
1518                 if (state == NULL) {
1519                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1520                         talloc_free(tmp_ctx);
1521                         return -1;
1522                 }
1523                 
1524                 ctdb_client_async_add(async_data, state);
1525         }
1526         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1527                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1528                 talloc_free(tmp_ctx);
1529                 return -1;
1530         }
1531
1532         talloc_free(tmp_ctx);
1533         return 0;
1534 }
1535
1536
1537 /*
1538   destroy a ctdb_client_ip structure
1539  */
1540 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1541 {
1542         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1543                 ctdb_addr_to_str(&ip->addr),
1544                 ntohs(ip->addr.ip.sin_port),
1545                 ip->client_id));
1546
1547         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1548         return 0;
1549 }
1550
1551 /*
1552   called by a client to inform us of a TCP connection that it is managing
1553   that should tickled with an ACK when IP takeover is done
1554   we handle both the old ipv4 style of packets as well as the new ipv4/6
1555   pdus.
1556  */
1557 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1558                                 TDB_DATA indata)
1559 {
1560         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1561         struct ctdb_control_tcp *old_addr = NULL;
1562         struct ctdb_control_tcp_addr new_addr;
1563         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1564         struct ctdb_tcp_list *tcp;
1565         struct ctdb_tcp_connection t;
1566         int ret;
1567         TDB_DATA data;
1568         struct ctdb_client_ip *ip;
1569         struct ctdb_vnn *vnn;
1570         ctdb_sock_addr addr;
1571
1572         switch (indata.dsize) {
1573         case sizeof(struct ctdb_control_tcp):
1574                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1575                 ZERO_STRUCT(new_addr);
1576                 tcp_sock = &new_addr;
1577                 tcp_sock->src.ip  = old_addr->src;
1578                 tcp_sock->dest.ip = old_addr->dest;
1579                 break;
1580         case sizeof(struct ctdb_control_tcp_addr):
1581                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1582                 break;
1583         default:
1584                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1585                                  "to ctdb_control_tcp_client. size was %d but "
1586                                  "only allowed sizes are %lu and %lu\n",
1587                                  (int)indata.dsize,
1588                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1589                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1590                 return -1;
1591         }
1592
1593         addr = tcp_sock->src;
1594         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1595         addr = tcp_sock->dest;
1596         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1597
1598         ZERO_STRUCT(addr);
1599         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1600         vnn = find_public_ip_vnn(ctdb, &addr);
1601         if (vnn == NULL) {
1602                 switch (addr.sa.sa_family) {
1603                 case AF_INET:
1604                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1605                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1606                                         ctdb_addr_to_str(&addr)));
1607                         }
1608                         break;
1609                 case AF_INET6:
1610                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1611                                 ctdb_addr_to_str(&addr)));
1612                         break;
1613                 default:
1614                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1615                 }
1616
1617                 return 0;
1618         }
1619
1620         if (vnn->pnn != ctdb->pnn) {
1621                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1622                         ctdb_addr_to_str(&addr),
1623                         client_id, client->pid));
1624                 /* failing this call will tell smbd to die */
1625                 return -1;
1626         }
1627
1628         ip = talloc(client, struct ctdb_client_ip);
1629         CTDB_NO_MEMORY(ctdb, ip);
1630
1631         ip->ctdb      = ctdb;
1632         ip->addr      = addr;
1633         ip->client_id = client_id;
1634         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1635         DLIST_ADD(ctdb->client_ip_list, ip);
1636
1637         tcp = talloc(client, struct ctdb_tcp_list);
1638         CTDB_NO_MEMORY(ctdb, tcp);
1639
1640         tcp->connection.src_addr = tcp_sock->src;
1641         tcp->connection.dst_addr = tcp_sock->dest;
1642
1643         DLIST_ADD(client->tcp_list, tcp);
1644
1645         t.src_addr = tcp_sock->src;
1646         t.dst_addr = tcp_sock->dest;
1647
1648         data.dptr = (uint8_t *)&t;
1649         data.dsize = sizeof(t);
1650
1651         switch (addr.sa.sa_family) {
1652         case AF_INET:
1653                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1654                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1655                         ctdb_addr_to_str(&tcp_sock->src),
1656                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1657                 break;
1658         case AF_INET6:
1659                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1660                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1661                         ctdb_addr_to_str(&tcp_sock->src),
1662                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1663                 break;
1664         default:
1665                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1666         }
1667
1668
1669         /* tell all nodes about this tcp connection */
1670         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1671                                        CTDB_CONTROL_TCP_ADD,
1672                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1673         if (ret != 0) {
1674                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1675                 return -1;
1676         }
1677
1678         return 0;
1679 }
1680
1681 /*
1682   find a tcp address on a list
1683  */
1684 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1685                                            struct ctdb_tcp_connection *tcp)
1686 {
1687         int i;
1688
1689         if (array == NULL) {
1690                 return NULL;
1691         }
1692
1693         for (i=0;i<array->num;i++) {
1694                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1695                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1696                         return &array->connections[i];
1697                 }
1698         }
1699         return NULL;
1700 }
1701
1702
1703
1704 /*
1705   called by a daemon to inform us of a TCP connection that one of its
1706   clients managing that should tickled with an ACK when IP takeover is
1707   done
1708  */
1709 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1710 {
1711         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1712         struct ctdb_tcp_array *tcparray;
1713         struct ctdb_tcp_connection tcp;
1714         struct ctdb_vnn *vnn;
1715
1716         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1717         if (vnn == NULL) {
1718                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1719                         ctdb_addr_to_str(&p->dst_addr)));
1720
1721                 return -1;
1722         }
1723
1724
1725         tcparray = vnn->tcp_array;
1726
1727         /* If this is the first tickle */
1728         if (tcparray == NULL) {
1729                 tcparray = talloc_size(ctdb->nodes, 
1730                         offsetof(struct ctdb_tcp_array, connections) +
1731                         sizeof(struct ctdb_tcp_connection) * 1);
1732                 CTDB_NO_MEMORY(ctdb, tcparray);
1733                 vnn->tcp_array = tcparray;
1734
1735                 tcparray->num = 0;
1736                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1737                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1738
1739                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1740                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1741                 tcparray->num++;
1742
1743                 if (tcp_update_needed) {
1744                         vnn->tcp_update_needed = true;
1745                 }
1746                 return 0;
1747         }
1748
1749
1750         /* Do we already have this tickle ?*/
1751         tcp.src_addr = p->src_addr;
1752         tcp.dst_addr = p->dst_addr;
1753         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1754                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1755                         ctdb_addr_to_str(&tcp.dst_addr),
1756                         ntohs(tcp.dst_addr.ip.sin_port),
1757                         vnn->pnn));
1758                 return 0;
1759         }
1760
1761         /* A new tickle, we must add it to the array */
1762         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1763                                         struct ctdb_tcp_connection,
1764                                         tcparray->num+1);
1765         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1766
1767         vnn->tcp_array = tcparray;
1768         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1769         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1770         tcparray->num++;
1771                                 
1772         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1773                 ctdb_addr_to_str(&tcp.dst_addr),
1774                 ntohs(tcp.dst_addr.ip.sin_port),
1775                 vnn->pnn));
1776
1777         if (tcp_update_needed) {
1778                 vnn->tcp_update_needed = true;
1779         }
1780
1781         return 0;
1782 }
1783
1784
1785 /*
1786   called by a daemon to inform us of a TCP connection that one of its
1787   clients managing that should tickled with an ACK when IP takeover is
1788   done
1789  */
1790 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1791 {
1792         struct ctdb_tcp_connection *tcpp;
1793         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1794
1795         if (vnn == NULL) {
1796                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1797                         ctdb_addr_to_str(&conn->dst_addr)));
1798                 return;
1799         }
1800
1801         /* if the array is empty we cant remove it
1802            and we dont need to do anything
1803          */
1804         if (vnn->tcp_array == NULL) {
1805                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1806                         ctdb_addr_to_str(&conn->dst_addr),
1807                         ntohs(conn->dst_addr.ip.sin_port)));
1808                 return;
1809         }
1810
1811
1812         /* See if we know this connection
1813            if we dont know this connection  then we dont need to do anything
1814          */
1815         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1816         if (tcpp == NULL) {
1817                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1818                         ctdb_addr_to_str(&conn->dst_addr),
1819                         ntohs(conn->dst_addr.ip.sin_port)));
1820                 return;
1821         }
1822
1823
1824         /* We need to remove this entry from the array.
1825            Instead of allocating a new array and copying data to it
1826            we cheat and just copy the last entry in the existing array
1827            to the entry that is to be removed and just shring the 
1828            ->num field
1829          */
1830         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1831         vnn->tcp_array->num--;
1832
1833         /* If we deleted the last entry we also need to remove the entire array
1834          */
1835         if (vnn->tcp_array->num == 0) {
1836                 talloc_free(vnn->tcp_array);
1837                 vnn->tcp_array = NULL;
1838         }               
1839
1840         vnn->tcp_update_needed = true;
1841
1842         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1843                 ctdb_addr_to_str(&conn->src_addr),
1844                 ntohs(conn->src_addr.ip.sin_port)));
1845 }
1846
1847
1848 /*
1849   called by a daemon to inform us of a TCP connection that one of its
1850   clients used are no longer needed in the tickle database
1851  */
1852 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1853 {
1854         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1855
1856         ctdb_remove_tcp_connection(ctdb, conn);
1857
1858         return 0;
1859 }
1860
1861
1862 /*
1863   called when a daemon restarts - send all tickes for all public addresses
1864   we are serving immediately to the new node.
1865  */
1866 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1867 {
1868 /*XXX here we should send all tickes we are serving to the new node */
1869         return 0;
1870 }
1871
1872
1873 /*
1874   called when a client structure goes away - hook to remove
1875   elements from the tcp_list in all daemons
1876  */
1877 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1878 {
1879         while (client->tcp_list) {
1880                 struct ctdb_tcp_list *tcp = client->tcp_list;
1881                 DLIST_REMOVE(client->tcp_list, tcp);
1882                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1883         }
1884 }
1885
1886
1887 /*
1888   release all IPs on shutdown
1889  */
1890 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1891 {
1892         struct ctdb_vnn *vnn;
1893
1894         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1895                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1896                         ctdb_vnn_unassign_iface(ctdb, vnn);
1897                         continue;
1898                 }
1899                 if (!vnn->iface) {
1900                         continue;
1901                 }
1902                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1903                                   ctdb_vnn_iface_string(vnn),
1904                                   ctdb_addr_to_str(&vnn->public_address),
1905                                   vnn->public_netmask_bits);
1906                 release_kill_clients(ctdb, &vnn->public_address);
1907                 ctdb_vnn_unassign_iface(ctdb, vnn);
1908         }
1909 }
1910
1911
1912 /*
1913   get list of public IPs
1914  */
1915 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1916                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1917 {
1918         int i, num, len;
1919         struct ctdb_all_public_ips *ips;
1920         struct ctdb_vnn *vnn;
1921         bool only_available = false;
1922
1923         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1924                 only_available = true;
1925         }
1926
1927         /* count how many public ip structures we have */
1928         num = 0;
1929         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1930                 num++;
1931         }
1932
1933         len = offsetof(struct ctdb_all_public_ips, ips) + 
1934                 num*sizeof(struct ctdb_public_ip);
1935         ips = talloc_zero_size(outdata, len);
1936         CTDB_NO_MEMORY(ctdb, ips);
1937
1938         i = 0;
1939         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1940                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1941                         continue;
1942                 }
1943                 ips->ips[i].pnn  = vnn->pnn;
1944                 ips->ips[i].addr = vnn->public_address;
1945                 i++;
1946         }
1947         ips->num = i;
1948         len = offsetof(struct ctdb_all_public_ips, ips) +
1949                 i*sizeof(struct ctdb_public_ip);
1950
1951         outdata->dsize = len;
1952         outdata->dptr  = (uint8_t *)ips;
1953
1954         return 0;
1955 }
1956
1957
1958 /*
1959   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1960  */
1961 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1962                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1963 {
1964         int i, num, len;
1965         struct ctdb_all_public_ipsv4 *ips;
1966         struct ctdb_vnn *vnn;
1967
1968         /* count how many public ip structures we have */
1969         num = 0;
1970         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1971                 if (vnn->public_address.sa.sa_family != AF_INET) {
1972                         continue;
1973                 }
1974                 num++;
1975         }
1976
1977         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1978                 num*sizeof(struct ctdb_public_ipv4);
1979         ips = talloc_zero_size(outdata, len);
1980         CTDB_NO_MEMORY(ctdb, ips);
1981
1982         outdata->dsize = len;
1983         outdata->dptr  = (uint8_t *)ips;
1984
1985         ips->num = num;
1986         i = 0;
1987         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1988                 if (vnn->public_address.sa.sa_family != AF_INET) {
1989                         continue;
1990                 }
1991                 ips->ips[i].pnn = vnn->pnn;
1992                 ips->ips[i].sin = vnn->public_address.ip;
1993                 i++;
1994         }
1995
1996         return 0;
1997 }
1998
1999 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2000                                         struct ctdb_req_control *c,
2001                                         TDB_DATA indata,
2002                                         TDB_DATA *outdata)
2003 {
2004         int i, num, len;
2005         ctdb_sock_addr *addr;
2006         struct ctdb_control_public_ip_info *info;
2007         struct ctdb_vnn *vnn;
2008
2009         addr = (ctdb_sock_addr *)indata.dptr;
2010
2011         vnn = find_public_ip_vnn(ctdb, addr);
2012         if (vnn == NULL) {
2013                 /* if it is not a public ip   it could be our 'single ip' */
2014                 if (ctdb->single_ip_vnn) {
2015                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2016                                 vnn = ctdb->single_ip_vnn;
2017                         }
2018                 }
2019         }
2020         if (vnn == NULL) {
2021                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2022                                  "'%s'not a public address\n",
2023                                  ctdb_addr_to_str(addr)));
2024                 return -1;
2025         }
2026
2027         /* count how many public ip structures we have */
2028         num = 0;
2029         for (;vnn->ifaces[num];) {
2030                 num++;
2031         }
2032
2033         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2034                 num*sizeof(struct ctdb_control_iface_info);
2035         info = talloc_zero_size(outdata, len);
2036         CTDB_NO_MEMORY(ctdb, info);
2037
2038         info->ip.addr = vnn->public_address;
2039         info->ip.pnn = vnn->pnn;
2040         info->active_idx = 0xFFFFFFFF;
2041
2042         for (i=0; vnn->ifaces[i]; i++) {
2043                 struct ctdb_iface *cur;
2044
2045                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2046                 if (cur == NULL) {
2047                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2048                                            vnn->ifaces[i]));
2049                         return -1;
2050                 }
2051                 if (vnn->iface == cur) {
2052                         info->active_idx = i;
2053                 }
2054                 strcpy(info->ifaces[i].name, cur->name);
2055                 info->ifaces[i].link_state = cur->link_up;
2056                 info->ifaces[i].references = cur->references;
2057         }
2058         info->num = i;
2059         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2060                 i*sizeof(struct ctdb_control_iface_info);
2061
2062         outdata->dsize = len;
2063         outdata->dptr  = (uint8_t *)info;
2064
2065         return 0;
2066 }
2067
2068 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2069                                 struct ctdb_req_control *c,
2070                                 TDB_DATA *outdata)
2071 {
2072         int i, num, len;
2073         struct ctdb_control_get_ifaces *ifaces;
2074         struct ctdb_iface *cur;
2075
2076         /* count how many public ip structures we have */
2077         num = 0;
2078         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2079                 num++;
2080         }
2081
2082         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2083                 num*sizeof(struct ctdb_control_iface_info);
2084         ifaces = talloc_zero_size(outdata, len);
2085         CTDB_NO_MEMORY(ctdb, ifaces);
2086
2087         i = 0;
2088         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2089                 strcpy(ifaces->ifaces[i].name, cur->name);
2090                 ifaces->ifaces[i].link_state = cur->link_up;
2091                 ifaces->ifaces[i].references = cur->references;
2092                 i++;
2093         }
2094         ifaces->num = i;
2095         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2096                 i*sizeof(struct ctdb_control_iface_info);
2097
2098         outdata->dsize = len;
2099         outdata->dptr  = (uint8_t *)ifaces;
2100
2101         return 0;
2102 }
2103
2104 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2105                                     struct ctdb_req_control *c,
2106                                     TDB_DATA indata)
2107 {
2108         struct ctdb_control_iface_info *info;
2109         struct ctdb_iface *iface;
2110         bool link_up = false;
2111
2112         info = (struct ctdb_control_iface_info *)indata.dptr;
2113
2114         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2115                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2116                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2117                                   len, len, info->name));
2118                 return -1;
2119         }
2120
2121         switch (info->link_state) {
2122         case 0:
2123                 link_up = false;
2124                 break;
2125         case 1:
2126                 link_up = true;
2127                 break;
2128         default:
2129                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2130                                   (unsigned int)info->link_state));
2131                 return -1;
2132         }
2133
2134         if (info->references != 0) {
2135                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2136                                   (unsigned int)info->references));
2137                 return -1;
2138         }
2139
2140         iface = ctdb_find_iface(ctdb, info->name);
2141         if (iface == NULL) {
2142                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2143                                   info->name));
2144                 return -1;
2145         }
2146
2147         if (link_up == iface->link_up) {
2148                 return 0;
2149         }
2150
2151         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2152               ("iface[%s] has changed it's link status %s => %s\n",
2153                iface->name,
2154                iface->link_up?"up":"down",
2155                link_up?"up":"down"));
2156
2157         iface->link_up = link_up;
2158         return 0;
2159 }
2160
2161
2162 /* 
2163    structure containing the listening socket and the list of tcp connections
2164    that the ctdb daemon is to kill
2165 */
2166 struct ctdb_kill_tcp {
2167         struct ctdb_vnn *vnn;
2168         struct ctdb_context *ctdb;
2169         int capture_fd;
2170         struct fd_event *fde;
2171         trbt_tree_t *connections;
2172         void *private_data;
2173 };
2174
2175 /*
2176   a tcp connection that is to be killed
2177  */
2178 struct ctdb_killtcp_con {
2179         ctdb_sock_addr src_addr;
2180         ctdb_sock_addr dst_addr;
2181         int count;
2182         struct ctdb_kill_tcp *killtcp;
2183 };
2184
2185 /* this function is used to create a key to represent this socketpair
2186    in the killtcp tree.
2187    this key is used to insert and lookup matching socketpairs that are
2188    to be tickled and RST
2189 */
2190 #define KILLTCP_KEYLEN  10
2191 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2192 {
2193         static uint32_t key[KILLTCP_KEYLEN];
2194
2195         bzero(key, sizeof(key));
2196
2197         if (src->sa.sa_family != dst->sa.sa_family) {
2198                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2199                 return key;
2200         }
2201         
2202         switch (src->sa.sa_family) {
2203         case AF_INET:
2204                 key[0]  = dst->ip.sin_addr.s_addr;
2205                 key[1]  = src->ip.sin_addr.s_addr;
2206                 key[2]  = dst->ip.sin_port;
2207                 key[3]  = src->ip.sin_port;
2208                 break;
2209         case AF_INET6:
2210                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2211                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2212                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2213                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2214                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2215                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2216                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2217                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2218                 key[8]  = dst->ip6.sin6_port;
2219                 key[9]  = src->ip6.sin6_port;
2220                 break;
2221         default:
2222                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2223                 return key;
2224         }
2225
2226         return key;
2227 }
2228
2229 /*
2230   called when we get a read event on the raw socket
2231  */
2232 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2233                                 uint16_t flags, void *private_data)
2234 {
2235         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2236         struct ctdb_killtcp_con *con;
2237         ctdb_sock_addr src, dst;
2238         uint32_t ack_seq, seq;
2239
2240         if (!(flags & EVENT_FD_READ)) {
2241                 return;
2242         }
2243
2244         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2245                                 killtcp->private_data,
2246                                 &src, &dst,
2247                                 &ack_seq, &seq) != 0) {
2248                 /* probably a non-tcp ACK packet */
2249                 return;
2250         }
2251
2252         /* check if we have this guy in our list of connections
2253            to kill
2254         */
2255         con = trbt_lookuparray32(killtcp->connections, 
2256                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2257         if (con == NULL) {
2258                 /* no this was some other packet we can just ignore */
2259                 return;
2260         }
2261
2262         /* This one has been tickled !
2263            now reset him and remove him from the list.
2264          */
2265         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2266                 ntohs(con->dst_addr.ip.sin_port),
2267                 ctdb_addr_to_str(&con->src_addr),
2268                 ntohs(con->src_addr.ip.sin_port)));
2269
2270         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2271         talloc_free(con);
2272 }
2273
2274
2275 /* when traversing the list of all tcp connections to send tickle acks to
2276    (so that we can capture the ack coming back and kill the connection
2277     by a RST)
2278    this callback is called for each connection we are currently trying to kill
2279 */
2280 static void tickle_connection_traverse(void *param, void *data)
2281 {
2282         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2283
2284         /* have tried too many times, just give up */
2285         if (con->count >= 5) {
2286                 talloc_free(con);
2287                 return;
2288         }
2289
2290         /* othervise, try tickling it again */
2291         con->count++;
2292         ctdb_sys_send_tcp(
2293                 (ctdb_sock_addr *)&con->dst_addr,
2294                 (ctdb_sock_addr *)&con->src_addr,
2295                 0, 0, 0);
2296 }
2297
2298
2299 /* 
2300    called every second until all sentenced connections have been reset
2301  */
2302 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2303                                               struct timeval t, void *private_data)
2304 {
2305         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2306
2307
2308         /* loop over all connections sending tickle ACKs */
2309         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2310
2311
2312         /* If there are no more connections to kill we can remove the
2313            entire killtcp structure
2314          */
2315         if ( (killtcp->connections == NULL) || 
2316              (killtcp->connections->root == NULL) ) {
2317                 talloc_free(killtcp);
2318                 return;
2319         }
2320
2321         /* try tickling them again in a seconds time
2322          */
2323         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2324                         ctdb_tickle_sentenced_connections, killtcp);
2325 }
2326
2327 /*
2328   destroy the killtcp structure
2329  */
2330 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2331 {
2332         if (killtcp->vnn) {
2333                 killtcp->vnn->killtcp = NULL;
2334         }
2335         return 0;
2336 }
2337
2338
2339 /* nothing fancy here, just unconditionally replace any existing
2340    connection structure with the new one.
2341
2342    dont even free the old one if it did exist, that one is talloc_stolen
2343    by the same node in the tree anyway and will be deleted when the new data 
2344    is deleted
2345 */
2346 static void *add_killtcp_callback(void *parm, void *data)
2347 {
2348         return parm;
2349 }
2350
2351 /*
2352   add a tcp socket to the list of connections we want to RST
2353  */
2354 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2355                                        ctdb_sock_addr *s,
2356                                        ctdb_sock_addr *d)
2357 {
2358         ctdb_sock_addr src, dst;
2359         struct ctdb_kill_tcp *killtcp;
2360         struct ctdb_killtcp_con *con;
2361         struct ctdb_vnn *vnn;
2362
2363         ctdb_canonicalize_ip(s, &src);
2364         ctdb_canonicalize_ip(d, &dst);
2365
2366         vnn = find_public_ip_vnn(ctdb, &dst);
2367         if (vnn == NULL) {
2368                 vnn = find_public_ip_vnn(ctdb, &src);
2369         }
2370         if (vnn == NULL) {
2371                 /* if it is not a public ip   it could be our 'single ip' */
2372                 if (ctdb->single_ip_vnn) {
2373                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2374                                 vnn = ctdb->single_ip_vnn;
2375                         }
2376                 }
2377         }
2378         if (vnn == NULL) {
2379                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2380                 return -1;
2381         }
2382
2383         killtcp = vnn->killtcp;
2384         
2385         /* If this is the first connection to kill we must allocate
2386            a new structure
2387          */
2388         if (killtcp == NULL) {
2389                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2390                 CTDB_NO_MEMORY(ctdb, killtcp);
2391
2392                 killtcp->vnn         = vnn;
2393                 killtcp->ctdb        = ctdb;
2394                 killtcp->capture_fd  = -1;
2395                 killtcp->connections = trbt_create(killtcp, 0);
2396
2397                 vnn->killtcp         = killtcp;
2398                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2399         }
2400
2401
2402
2403         /* create a structure that describes this connection we want to
2404            RST and store it in killtcp->connections
2405         */
2406         con = talloc(killtcp, struct ctdb_killtcp_con);
2407         CTDB_NO_MEMORY(ctdb, con);
2408         con->src_addr = src;
2409         con->dst_addr = dst;
2410         con->count    = 0;
2411         con->killtcp  = killtcp;
2412
2413
2414         trbt_insertarray32_callback(killtcp->connections,
2415                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2416                         add_killtcp_callback, con);
2417
2418         /* 
2419            If we dont have a socket to listen on yet we must create it
2420          */
2421         if (killtcp->capture_fd == -1) {
2422                 const char *iface = ctdb_vnn_iface_string(vnn);
2423                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2424                 if (killtcp->capture_fd == -1) {
2425                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2426                                           "socket on iface '%s' for killtcp (%s)\n",
2427                                           iface, strerror(errno)));
2428                         goto failed;
2429                 }
2430         }
2431
2432
2433         if (killtcp->fde == NULL) {
2434                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2435                                             EVENT_FD_READ,
2436                                             capture_tcp_handler, killtcp);
2437                 tevent_fd_set_auto_close(killtcp->fde);
2438
2439                 /* We also need to set up some events to tickle all these connections
2440                    until they are all reset
2441                 */
2442                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2443                                 ctdb_tickle_sentenced_connections, killtcp);
2444         }
2445
2446         /* tickle him once now */
2447         ctdb_sys_send_tcp(
2448                 &con->dst_addr,
2449                 &con->src_addr,
2450                 0, 0, 0);
2451
2452         return 0;
2453
2454 failed:
2455         talloc_free(vnn->killtcp);
2456         vnn->killtcp = NULL;
2457         return -1;
2458 }
2459
2460 /*
2461   kill a TCP connection.
2462  */
2463 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2464 {
2465         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2466
2467         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2468 }
2469
2470 /*
2471   called by a daemon to inform us of the entire list of TCP tickles for
2472   a particular public address.
2473   this control should only be sent by the node that is currently serving
2474   that public address.
2475  */
2476 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2477 {
2478         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2479         struct ctdb_tcp_array *tcparray;
2480         struct ctdb_vnn *vnn;
2481
2482         /* We must at least have tickles.num or else we cant verify the size
2483            of the received data blob
2484          */
2485         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2486                                         tickles.connections)) {
2487                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2488                 return -1;
2489         }
2490
2491         /* verify that the size of data matches what we expect */
2492         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2493                                 tickles.connections)
2494                          + sizeof(struct ctdb_tcp_connection)
2495                                  * list->tickles.num) {
2496                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2497                 return -1;
2498         }       
2499
2500         vnn = find_public_ip_vnn(ctdb, &list->addr);
2501         if (vnn == NULL) {
2502                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2503                         ctdb_addr_to_str(&list->addr)));
2504
2505                 return 1;
2506         }
2507
2508         /* remove any old ticklelist we might have */
2509         talloc_free(vnn->tcp_array);
2510         vnn->tcp_array = NULL;
2511
2512         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2513         CTDB_NO_MEMORY(ctdb, tcparray);
2514
2515         tcparray->num = list->tickles.num;
2516
2517         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2518         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2519
2520         memcpy(tcparray->connections, &list->tickles.connections[0], 
2521                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2522
2523         /* We now have a new fresh tickle list array for this vnn */
2524         vnn->tcp_array = talloc_steal(vnn, tcparray);
2525         
2526         return 0;
2527 }
2528
2529 /*
2530   called to return the full list of tickles for the puclic address associated 
2531   with the provided vnn
2532  */
2533 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2534 {
2535         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2536         struct ctdb_control_tcp_tickle_list *list;
2537         struct ctdb_tcp_array *tcparray;
2538         int num;
2539         struct ctdb_vnn *vnn;
2540
2541         vnn = find_public_ip_vnn(ctdb, addr);
2542         if (vnn == NULL) {
2543                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2544                         ctdb_addr_to_str(addr)));
2545
2546                 return 1;
2547         }
2548
2549         tcparray = vnn->tcp_array;
2550         if (tcparray) {
2551                 num = tcparray->num;
2552         } else {
2553                 num = 0;
2554         }
2555
2556         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2557                                 tickles.connections)
2558                         + sizeof(struct ctdb_tcp_connection) * num;
2559
2560         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2561         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2562         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2563
2564         list->addr = *addr;
2565         list->tickles.num = num;
2566         if (num) {
2567                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2568                         sizeof(struct ctdb_tcp_connection) * num);
2569         }
2570
2571         return 0;
2572 }
2573
2574
2575 /*
2576   set the list of all tcp tickles for a public address
2577  */
2578 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2579                               struct timeval timeout, uint32_t destnode, 
2580                               ctdb_sock_addr *addr,
2581                               struct ctdb_tcp_array *tcparray)
2582 {
2583         int ret, num;
2584         TDB_DATA data;
2585         struct ctdb_control_tcp_tickle_list *list;
2586
2587         if (tcparray) {
2588                 num = tcparray->num;
2589         } else {
2590                 num = 0;
2591         }
2592
2593         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2594                                 tickles.connections) +
2595                         sizeof(struct ctdb_tcp_connection) * num;
2596         data.dptr = talloc_size(ctdb, data.dsize);
2597         CTDB_NO_MEMORY(ctdb, data.dptr);
2598
2599         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2600         list->addr = *addr;
2601         list->tickles.num = num;
2602         if (tcparray) {
2603                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2604         }
2605
2606         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2607                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2608                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2609         if (ret != 0) {
2610                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2611                 return -1;
2612         }
2613
2614         talloc_free(data.dptr);
2615
2616         return ret;
2617 }
2618
2619
2620 /*
2621   perform tickle updates if required
2622  */
2623 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2624                                 struct timed_event *te, 
2625                                 struct timeval t, void *private_data)
2626 {
2627         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2628         int ret;
2629         struct ctdb_vnn *vnn;
2630
2631         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2632                 /* we only send out updates for public addresses that 
2633                    we have taken over
2634                  */
2635                 if (ctdb->pnn != vnn->pnn) {
2636                         continue;
2637                 }
2638                 /* We only send out the updates if we need to */
2639                 if (!vnn->tcp_update_needed) {
2640                         continue;
2641                 }
2642                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2643                                 TAKEOVER_TIMEOUT(),
2644                                 CTDB_BROADCAST_CONNECTED,
2645                                 &vnn->public_address,
2646                                 vnn->tcp_array);
2647                 if (ret != 0) {
2648                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2649                                 ctdb_addr_to_str(&vnn->public_address)));
2650                 }
2651         }
2652
2653         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2654                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2655                              ctdb_update_tcp_tickles, ctdb);
2656 }               
2657         
2658
2659 /*
2660   start periodic update of tcp tickles
2661  */
2662 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2663 {
2664         ctdb->tickle_update_context = talloc_new(ctdb);
2665
2666         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2667                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2668                              ctdb_update_tcp_tickles, ctdb);
2669 }
2670
2671
2672
2673
2674 struct control_gratious_arp {
2675         struct ctdb_context *ctdb;
2676         ctdb_sock_addr addr;
2677         const char *iface;
2678         int count;
2679 };
2680
2681 /*
2682   send a control_gratuitous arp
2683  */
2684 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2685                                   struct timeval t, void *private_data)
2686 {
2687         int ret;
2688         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2689                                                         struct control_gratious_arp);
2690
2691         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2692         if (ret != 0) {
2693                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2694                                  arp->iface, strerror(errno)));
2695         }
2696
2697
2698         arp->count++;
2699         if (arp->count == CTDB_ARP_REPEAT) {
2700                 talloc_free(arp);
2701                 return;
2702         }
2703
2704         event_add_timed(arp->ctdb->ev, arp, 
2705                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2706                         send_gratious_arp, arp);
2707 }
2708
2709
2710 /*
2711   send a gratious arp 
2712  */
2713 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2714 {
2715         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2716         struct control_gratious_arp *arp;
2717
2718         /* verify the size of indata */
2719         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2720                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2721                                  (unsigned)indata.dsize, 
2722                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2723                 return -1;
2724         }
2725         if (indata.dsize != 
2726                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2727                 + gratious_arp->len ) ){
2728
2729                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2730                         "but should be %u bytes\n", 
2731                          (unsigned)indata.dsize, 
2732                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2733                 return -1;
2734         }
2735
2736
2737         arp = talloc(ctdb, struct control_gratious_arp);
2738         CTDB_NO_MEMORY(ctdb, arp);
2739
2740         arp->ctdb  = ctdb;
2741         arp->addr   = gratious_arp->addr;
2742         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2743         CTDB_NO_MEMORY(ctdb, arp->iface);
2744         arp->count = 0;
2745         
2746         event_add_timed(arp->ctdb->ev, arp, 
2747                         timeval_zero(), send_gratious_arp, arp);
2748
2749         return 0;
2750 }
2751
2752 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2753 {
2754         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2755         int ret;
2756
2757         /* verify the size of indata */
2758         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2759                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2760                 return -1;
2761         }
2762         if (indata.dsize != 
2763                 ( offsetof(struct ctdb_control_ip_iface, iface)
2764                 + pub->len ) ){
2765
2766                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2767                         "but should be %u bytes\n", 
2768                          (unsigned)indata.dsize, 
2769                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2770                 return -1;
2771         }
2772
2773         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2774
2775         if (ret != 0) {
2776                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2777                 return -1;
2778         }
2779
2780         return 0;
2781 }
2782
2783 /*
2784   called when releaseip event finishes for del_public_address
2785  */
2786 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2787                                 void *private_data)
2788 {
2789         talloc_free(private_data);
2790 }
2791
2792 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2793 {
2794         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2795         struct ctdb_vnn *vnn;
2796         int ret;
2797
2798         /* verify the size of indata */
2799         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2800                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2801                 return -1;
2802         }
2803         if (indata.dsize != 
2804                 ( offsetof(struct ctdb_control_ip_iface, iface)
2805                 + pub->len ) ){
2806
2807                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2808                         "but should be %u bytes\n", 
2809                          (unsigned)indata.dsize, 
2810                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2811                 return -1;
2812         }
2813
2814         /* walk over all public addresses until we find a match */
2815         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2816                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2817                         TALLOC_CTX *mem_ctx;
2818
2819                         DLIST_REMOVE(ctdb->vnn, vnn);
2820                         if (vnn->iface == NULL) {
2821                                 talloc_free(vnn);
2822                                 return 0;
2823                         }
2824
2825                         mem_ctx = talloc_new(ctdb);
2826                         ret = ctdb_event_script_callback(ctdb, 
2827                                          mem_ctx, delete_ip_callback, mem_ctx,
2828                                          false,
2829                                          CTDB_EVENT_RELEASE_IP,
2830                                          "%s %s %u",
2831                                          ctdb_vnn_iface_string(vnn),
2832                                          ctdb_addr_to_str(&vnn->public_address),
2833                                          vnn->public_netmask_bits);
2834                         ctdb_vnn_unassign_iface(ctdb, vnn);
2835                         talloc_free(vnn);
2836                         if (ret != 0) {
2837                                 return -1;
2838                         }
2839                         return 0;
2840                 }
2841         }
2842
2843         return -1;
2844 }
2845
2846 /* This function is called from the recovery daemon to verify that a remote
2847    node has the expected ip allocation.
2848    This is verified against ctdb->ip_tree
2849 */
2850 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2851 {
2852         struct ctdb_public_ip_list *tmp_ip; 
2853         int i;
2854
2855         if (ctdb->ip_tree == NULL) {
2856                 /* dont know the expected allocation yet, assume remote node
2857                    is correct. */
2858                 return 0;
2859         }
2860
2861         if (ips == NULL) {
2862                 return 0;
2863         }
2864
2865         for (i=0; i<ips->num; i++) {
2866                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2867                 if (tmp_ip == NULL) {
2868                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2869                         return -1;
2870                 }
2871
2872                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2873                         continue;
2874                 }
2875
2876                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2877                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2878                         return -1;
2879                 }
2880         }
2881
2882         return 0;
2883 }
2884
2885 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2886 {
2887         struct ctdb_public_ip_list *tmp_ip; 
2888
2889         if (ctdb->ip_tree == NULL) {
2890                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2891                 return -1;
2892         }
2893
2894         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2895         if (tmp_ip == NULL) {
2896                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2897                 return -1;
2898         }
2899
2900         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2901         tmp_ip->pnn = ip->pnn;
2902
2903         return 0;
2904 }